1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 21 { 22 Mat B; 23 24 PetscFunctionBegin; 25 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 26 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 27 PetscCall(MatDestroy(&B)); 28 PetscFunctionReturn(0); 29 } 30 31 /*MC 32 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 35 and MATMPIAIJ otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 42 43 Developer Notes: 44 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 45 enough exist. 46 47 Level: beginner 48 49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 50 M*/ 51 52 /*MC 53 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 54 55 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 56 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 57 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 58 for communicators controlling multiple processes. It is recommended that you call both of 59 the above preallocation routines for simplicity. 60 61 Options Database Keys: 62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 63 64 Level: beginner 65 66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 67 M*/ 68 69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 70 { 71 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 72 73 PetscFunctionBegin; 74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 75 A->boundtocpu = flg; 76 #endif 77 if (a->A) PetscCall(MatBindToCPU(a->A,flg)); 78 if (a->B) PetscCall(MatBindToCPU(a->B,flg)); 79 80 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 81 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 82 * to differ from the parent matrix. */ 83 if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg)); 84 if (a->diag) PetscCall(VecBindToCPU(a->diag,flg)); 85 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 90 { 91 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 92 93 PetscFunctionBegin; 94 if (mat->A) { 95 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 96 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 97 } 98 PetscFunctionReturn(0); 99 } 100 101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 102 { 103 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 105 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 106 const PetscInt *ia,*ib; 107 const MatScalar *aa,*bb,*aav,*bav; 108 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 109 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 110 111 PetscFunctionBegin; 112 *keptrows = NULL; 113 114 ia = a->i; 115 ib = b->i; 116 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 117 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 118 for (i=0; i<m; i++) { 119 na = ia[i+1] - ia[i]; 120 nb = ib[i+1] - ib[i]; 121 if (!na && !nb) { 122 cnt++; 123 goto ok1; 124 } 125 aa = aav + ia[i]; 126 for (j=0; j<na; j++) { 127 if (aa[j] != 0.0) goto ok1; 128 } 129 bb = bav + ib[i]; 130 for (j=0; j <nb; j++) { 131 if (bb[j] != 0.0) goto ok1; 132 } 133 cnt++; 134 ok1:; 135 } 136 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 137 if (!n0rows) { 138 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 140 PetscFunctionReturn(0); 141 } 142 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 143 cnt = 0; 144 for (i=0; i<m; i++) { 145 na = ia[i+1] - ia[i]; 146 nb = ib[i+1] - ib[i]; 147 if (!na && !nb) continue; 148 aa = aav + ia[i]; 149 for (j=0; j<na;j++) { 150 if (aa[j] != 0.0) { 151 rows[cnt++] = rstart + i; 152 goto ok2; 153 } 154 } 155 bb = bav + ib[i]; 156 for (j=0; j<nb; j++) { 157 if (bb[j] != 0.0) { 158 rows[cnt++] = rstart + i; 159 goto ok2; 160 } 161 } 162 ok2:; 163 } 164 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 165 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 167 PetscFunctionReturn(0); 168 } 169 170 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 171 { 172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 173 PetscBool cong; 174 175 PetscFunctionBegin; 176 PetscCall(MatHasCongruentLayouts(Y,&cong)); 177 if (Y->assembled && cong) { 178 PetscCall(MatDiagonalSet(aij->A,D,is)); 179 } else { 180 PetscCall(MatDiagonalSet_Default(Y,D,is)); 181 } 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 188 PetscInt i,rstart,nrows,*rows; 189 190 PetscFunctionBegin; 191 *zrows = NULL; 192 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 193 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 194 for (i=0; i<nrows; i++) rows[i] += rstart; 195 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 200 { 201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 202 PetscInt i,m,n,*garray = aij->garray; 203 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 204 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 205 PetscReal *work; 206 const PetscScalar *dummy; 207 208 PetscFunctionBegin; 209 PetscCall(MatGetSize(A,&m,&n)); 210 PetscCall(PetscCalloc1(n,&work)); 211 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 212 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 213 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 214 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 215 if (type == NORM_2) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 221 } 222 } else if (type == NORM_1) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 228 } 229 } else if (type == NORM_INFINITY) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 235 } 236 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 237 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 238 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 239 } 240 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 241 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 242 } 243 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 244 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 245 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 246 } 247 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 248 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 249 } 250 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 251 if (type == NORM_INFINITY) { 252 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 253 } else { 254 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 255 } 256 PetscCall(PetscFree(work)); 257 if (type == NORM_2) { 258 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 259 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 260 for (i=0; i<n; i++) reductions[i] /= m; 261 } 262 PetscFunctionReturn(0); 263 } 264 265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 266 { 267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 268 IS sis,gis; 269 const PetscInt *isis,*igis; 270 PetscInt n,*iis,nsis,ngis,rstart,i; 271 272 PetscFunctionBegin; 273 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 274 PetscCall(MatFindNonzeroRows(a->B,&gis)); 275 PetscCall(ISGetSize(gis,&ngis)); 276 PetscCall(ISGetSize(sis,&nsis)); 277 PetscCall(ISGetIndices(sis,&isis)); 278 PetscCall(ISGetIndices(gis,&igis)); 279 280 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 281 PetscCall(PetscArraycpy(iis,igis,ngis)); 282 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 283 n = ngis + nsis; 284 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 285 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 286 for (i=0; i<n; i++) iis[i] += rstart; 287 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 288 289 PetscCall(ISRestoreIndices(sis,&isis)); 290 PetscCall(ISRestoreIndices(gis,&igis)); 291 PetscCall(ISDestroy(&sis)); 292 PetscCall(ISDestroy(&gis)); 293 PetscFunctionReturn(0); 294 } 295 296 /* 297 Local utility routine that creates a mapping from the global column 298 number to the local number in the off-diagonal part of the local 299 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 300 a slightly higher hash table cost; without it it is not scalable (each processor 301 has an order N integer array but is fast to access. 302 */ 303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscInt n = aij->B->cmap->n,i; 307 308 PetscFunctionBegin; 309 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 310 #if defined(PETSC_USE_CTABLE) 311 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 312 for (i=0; i<n; i++) { 313 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 314 } 315 #else 316 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 317 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 318 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 319 #endif 320 PetscFunctionReturn(0); 321 } 322 323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 324 { \ 325 if (col <= lastcol1) low1 = 0; \ 326 else high1 = nrow1; \ 327 lastcol1 = col;\ 328 while (high1-low1 > 5) { \ 329 t = (low1+high1)/2; \ 330 if (rp1[t] > col) high1 = t; \ 331 else low1 = t; \ 332 } \ 333 for (_i=low1; _i<high1; _i++) { \ 334 if (rp1[_i] > col) break; \ 335 if (rp1[_i] == col) { \ 336 if (addv == ADD_VALUES) { \ 337 ap1[_i] += value; \ 338 /* Not sure LogFlops will slow dow the code or not */ \ 339 (void)PetscLogFlops(1.0); \ 340 } \ 341 else ap1[_i] = value; \ 342 goto a_noinsert; \ 343 } \ 344 } \ 345 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 346 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 347 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 348 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 349 N = nrow1++ - 1; a->nz++; high1++; \ 350 /* shift up all the later entries in this row */ \ 351 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 352 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 353 rp1[_i] = col; \ 354 ap1[_i] = value; \ 355 A->nonzerostate++;\ 356 a_noinsert: ; \ 357 ailen[row] = nrow1; \ 358 } 359 360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 361 { \ 362 if (col <= lastcol2) low2 = 0; \ 363 else high2 = nrow2; \ 364 lastcol2 = col; \ 365 while (high2-low2 > 5) { \ 366 t = (low2+high2)/2; \ 367 if (rp2[t] > col) high2 = t; \ 368 else low2 = t; \ 369 } \ 370 for (_i=low2; _i<high2; _i++) { \ 371 if (rp2[_i] > col) break; \ 372 if (rp2[_i] == col) { \ 373 if (addv == ADD_VALUES) { \ 374 ap2[_i] += value; \ 375 (void)PetscLogFlops(1.0); \ 376 } \ 377 else ap2[_i] = value; \ 378 goto b_noinsert; \ 379 } \ 380 } \ 381 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 382 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 383 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 384 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 385 N = nrow2++ - 1; b->nz++; high2++; \ 386 /* shift up all the later entries in this row */ \ 387 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 388 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 389 rp2[_i] = col; \ 390 ap2[_i] = value; \ 391 B->nonzerostate++; \ 392 b_noinsert: ; \ 393 bilen[row] = nrow2; \ 394 } 395 396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 397 { 398 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 400 PetscInt l,*garray = mat->garray,diag; 401 PetscScalar *aa,*ba; 402 403 PetscFunctionBegin; 404 /* code only works for square matrices A */ 405 406 /* find size of row to the left of the diagonal part */ 407 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 408 row = row - diag; 409 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 410 if (garray[b->j[b->i[row]+l]] > diag) break; 411 } 412 if (l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 418 /* diagonal part */ 419 if (a->i[row+1]-a->i[row]) { 420 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 421 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 422 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 423 } 424 425 /* right of diagonal part */ 426 if (b->i[row+1]-b->i[row]-l) { 427 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 428 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 429 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 435 { 436 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 437 PetscScalar value = 0.0; 438 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 439 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 440 PetscBool roworiented = aij->roworiented; 441 442 /* Some Variables required in the macro */ 443 Mat A = aij->A; 444 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 445 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 446 PetscBool ignorezeroentries = a->ignorezeroentries; 447 Mat B = aij->B; 448 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 449 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 450 MatScalar *aa,*ba; 451 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 452 PetscInt nonew; 453 MatScalar *ap1,*ap2; 454 455 PetscFunctionBegin; 456 PetscCall(MatSeqAIJGetArray(A,&aa)); 457 PetscCall(MatSeqAIJGetArray(B,&ba)); 458 for (i=0; i<m; i++) { 459 if (im[i] < 0) continue; 460 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 461 if (im[i] >= rstart && im[i] < rend) { 462 row = im[i] - rstart; 463 lastcol1 = -1; 464 rp1 = aj + ai[row]; 465 ap1 = aa + ai[row]; 466 rmax1 = aimax[row]; 467 nrow1 = ailen[row]; 468 low1 = 0; 469 high1 = nrow1; 470 lastcol2 = -1; 471 rp2 = bj + bi[row]; 472 ap2 = ba + bi[row]; 473 rmax2 = bimax[row]; 474 nrow2 = bilen[row]; 475 low2 = 0; 476 high2 = nrow2; 477 478 for (j=0; j<n; j++) { 479 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 480 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 481 if (in[j] >= cstart && in[j] < cend) { 482 col = in[j] - cstart; 483 nonew = a->nonew; 484 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 485 } else if (in[j] < 0) { 486 continue; 487 } else { 488 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 489 if (mat->was_assembled) { 490 if (!aij->colmap) { 491 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 492 } 493 #if defined(PETSC_USE_CTABLE) 494 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 495 col--; 496 #else 497 col = aij->colmap[in[j]] - 1; 498 #endif 499 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 500 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 501 col = in[j]; 502 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 503 B = aij->B; 504 b = (Mat_SeqAIJ*)B->data; 505 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 506 rp2 = bj + bi[row]; 507 ap2 = ba + bi[row]; 508 rmax2 = bimax[row]; 509 nrow2 = bilen[row]; 510 low2 = 0; 511 high2 = nrow2; 512 bm = aij->B->rmap->n; 513 ba = b->a; 514 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 515 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 516 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 517 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 518 } 519 } else col = in[j]; 520 nonew = b->nonew; 521 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 522 } 523 } 524 } else { 525 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 526 if (!aij->donotstash) { 527 mat->assembled = PETSC_FALSE; 528 if (roworiented) { 529 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } else { 531 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 532 } 533 } 534 } 535 } 536 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 537 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 538 PetscFunctionReturn(0); 539 } 540 541 /* 542 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 543 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 544 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 545 */ 546 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 547 { 548 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 549 Mat A = aij->A; /* diagonal part of the matrix */ 550 Mat B = aij->B; /* offdiagonal part of the matrix */ 551 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 552 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 553 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 554 PetscInt *ailen = a->ilen,*aj = a->j; 555 PetscInt *bilen = b->ilen,*bj = b->j; 556 PetscInt am = aij->A->rmap->n,j; 557 PetscInt diag_so_far = 0,dnz; 558 PetscInt offd_so_far = 0,onz; 559 560 PetscFunctionBegin; 561 /* Iterate over all rows of the matrix */ 562 for (j=0; j<am; j++) { 563 dnz = onz = 0; 564 /* Iterate over all non-zero columns of the current row */ 565 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 566 /* If column is in the diagonal */ 567 if (mat_j[col] >= cstart && mat_j[col] < cend) { 568 aj[diag_so_far++] = mat_j[col] - cstart; 569 dnz++; 570 } else { /* off-diagonal entries */ 571 bj[offd_so_far++] = mat_j[col]; 572 onz++; 573 } 574 } 575 ailen[j] = dnz; 576 bilen[j] = onz; 577 } 578 PetscFunctionReturn(0); 579 } 580 581 /* 582 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 583 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 584 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 585 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 586 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 587 */ 588 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 589 { 590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 591 Mat A = aij->A; /* diagonal part of the matrix */ 592 Mat B = aij->B; /* offdiagonal part of the matrix */ 593 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 595 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 597 PetscInt *ailen = a->ilen,*aj = a->j; 598 PetscInt *bilen = b->ilen,*bj = b->j; 599 PetscInt am = aij->A->rmap->n,j; 600 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 601 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 602 PetscScalar *aa = a->a,*ba = b->a; 603 604 PetscFunctionBegin; 605 /* Iterate over all rows of the matrix */ 606 for (j=0; j<am; j++) { 607 dnz_row = onz_row = 0; 608 rowstart_offd = full_offd_i[j]; 609 rowstart_diag = full_diag_i[j]; 610 /* Iterate over all non-zero columns of the current row */ 611 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 612 /* If column is in the diagonal */ 613 if (mat_j[col] >= cstart && mat_j[col] < cend) { 614 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 615 aa[rowstart_diag+dnz_row] = mat_a[col]; 616 dnz_row++; 617 } else { /* off-diagonal entries */ 618 bj[rowstart_offd+onz_row] = mat_j[col]; 619 ba[rowstart_offd+onz_row] = mat_a[col]; 620 onz_row++; 621 } 622 } 623 ailen[j] = dnz_row; 624 bilen[j] = onz_row; 625 } 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* negative row */ 638 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* negative column */ 643 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 647 } else { 648 if (!aij->colmap) { 649 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 PetscInt nstash,reallocs; 672 673 PetscFunctionBegin; 674 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 677 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 678 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 679 PetscFunctionReturn(0); 680 } 681 682 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 PetscMPIInt n; 686 PetscInt i,j,rstart,ncols,flg; 687 PetscInt *row,*col; 688 PetscBool other_disassembled; 689 PetscScalar *val; 690 691 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 692 693 PetscFunctionBegin; 694 if (!aij->donotstash && !mat->nooffprocentries) { 695 while (1) { 696 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 697 if (!flg) break; 698 699 for (i=0; i<n;) { 700 /* Now identify the consecutive vals belonging to the same row */ 701 for (j=i,rstart=row[j]; j<n; j++) { 702 if (row[j] != rstart) break; 703 } 704 if (j < n) ncols = j-i; 705 else ncols = n-i; 706 /* Now assemble all these values with a single function call */ 707 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 708 i = j; 709 } 710 } 711 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 712 } 713 #if defined(PETSC_HAVE_DEVICE) 714 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 715 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 716 if (mat->boundtocpu) { 717 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 718 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 719 } 720 #endif 721 PetscCall(MatAssemblyBegin(aij->A,mode)); 722 PetscCall(MatAssemblyEnd(aij->A,mode)); 723 724 /* determine if any processor has disassembled, if so we must 725 also disassemble ourself, in order that we may reassemble. */ 726 /* 727 if nonzero structure of submatrix B cannot change then we know that 728 no processor disassembled thus we can skip this stuff 729 */ 730 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 731 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 732 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 733 PetscCall(MatDisAssemble_MPIAIJ(mat)); 734 } 735 } 736 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 737 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 738 } 739 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 740 #if defined(PETSC_HAVE_DEVICE) 741 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 742 #endif 743 PetscCall(MatAssemblyBegin(aij->B,mode)); 744 PetscCall(MatAssemblyEnd(aij->B,mode)); 745 746 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 747 748 aij->rowvalues = NULL; 749 750 PetscCall(VecDestroy(&aij->diag)); 751 752 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 753 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 754 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 755 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 756 } 757 #if defined(PETSC_HAVE_DEVICE) 758 mat->offloadmask = PETSC_OFFLOAD_BOTH; 759 #endif 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 764 { 765 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 766 767 PetscFunctionBegin; 768 PetscCall(MatZeroEntries(l->A)); 769 PetscCall(MatZeroEntries(l->B)); 770 PetscFunctionReturn(0); 771 } 772 773 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 774 { 775 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 776 PetscObjectState sA, sB; 777 PetscInt *lrows; 778 PetscInt r, len; 779 PetscBool cong, lch, gch; 780 781 PetscFunctionBegin; 782 /* get locally owned rows */ 783 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 784 PetscCall(MatHasCongruentLayouts(A,&cong)); 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 791 PetscCall(VecGetArrayRead(x, &xx)); 792 PetscCall(VecGetArray(b, &bb)); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 PetscCall(VecRestoreArrayRead(x, &xx)); 795 PetscCall(VecRestoreArray(b, &bb)); 796 } 797 798 sA = mat->A->nonzerostate; 799 sB = mat->B->nonzerostate; 800 801 if (diag != 0.0 && cong) { 802 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 803 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 804 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 805 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 806 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 807 PetscInt nnwA, nnwB; 808 PetscBool nnzA, nnzB; 809 810 nnwA = aijA->nonew; 811 nnwB = aijB->nonew; 812 nnzA = aijA->keepnonzeropattern; 813 nnzB = aijB->keepnonzeropattern; 814 if (!nnzA) { 815 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 816 aijA->nonew = 0; 817 } 818 if (!nnzB) { 819 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 820 aijB->nonew = 0; 821 } 822 /* Must zero here before the next loop */ 823 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 824 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 825 for (r = 0; r < len; ++r) { 826 const PetscInt row = lrows[r] + A->rmap->rstart; 827 if (row >= A->cmap->N) continue; 828 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 829 } 830 aijA->nonew = nnwA; 831 aijB->nonew = nnwB; 832 } else { 833 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 834 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 835 } 836 PetscCall(PetscFree(lrows)); 837 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 838 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 839 840 /* reduce nonzerostate */ 841 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 842 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 843 if (gch) A->nonzerostate++; 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 848 { 849 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 850 PetscMPIInt n = A->rmap->n; 851 PetscInt i,j,r,m,len = 0; 852 PetscInt *lrows,*owners = A->rmap->range; 853 PetscMPIInt p = 0; 854 PetscSFNode *rrows; 855 PetscSF sf; 856 const PetscScalar *xx; 857 PetscScalar *bb,*mask,*aij_a; 858 Vec xmask,lmask; 859 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 860 const PetscInt *aj, *ii,*ridx; 861 PetscScalar *aa; 862 863 PetscFunctionBegin; 864 /* Create SF where leaves are input rows and roots are owned rows */ 865 PetscCall(PetscMalloc1(n, &lrows)); 866 for (r = 0; r < n; ++r) lrows[r] = -1; 867 PetscCall(PetscMalloc1(N, &rrows)); 868 for (r = 0; r < N; ++r) { 869 const PetscInt idx = rows[r]; 870 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 871 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 872 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 873 } 874 rrows[r].rank = p; 875 rrows[r].index = rows[r] - owners[p]; 876 } 877 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 878 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 879 /* Collect flags for rows to be zeroed */ 880 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 881 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 882 PetscCall(PetscSFDestroy(&sf)); 883 /* Compress and put in row numbers */ 884 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 885 /* zero diagonal part of matrix */ 886 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 887 /* handle off diagonal part of matrix */ 888 PetscCall(MatCreateVecs(A,&xmask,NULL)); 889 PetscCall(VecDuplicate(l->lvec,&lmask)); 890 PetscCall(VecGetArray(xmask,&bb)); 891 for (i=0; i<len; i++) bb[lrows[i]] = 1; 892 PetscCall(VecRestoreArray(xmask,&bb)); 893 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 894 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 895 PetscCall(VecDestroy(&xmask)); 896 if (x && b) { /* this code is buggy when the row and column layout don't match */ 897 PetscBool cong; 898 899 PetscCall(MatHasCongruentLayouts(A,&cong)); 900 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 901 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 902 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 903 PetscCall(VecGetArrayRead(l->lvec,&xx)); 904 PetscCall(VecGetArray(b,&bb)); 905 } 906 PetscCall(VecGetArray(lmask,&mask)); 907 /* remove zeroed rows of off diagonal matrix */ 908 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 909 ii = aij->i; 910 for (i=0; i<len; i++) { 911 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 912 } 913 /* loop over all elements of off process part of matrix zeroing removed columns*/ 914 if (aij->compressedrow.use) { 915 m = aij->compressedrow.nrows; 916 ii = aij->compressedrow.i; 917 ridx = aij->compressedrow.rindex; 918 for (i=0; i<m; i++) { 919 n = ii[i+1] - ii[i]; 920 aj = aij->j + ii[i]; 921 aa = aij_a + ii[i]; 922 923 for (j=0; j<n; j++) { 924 if (PetscAbsScalar(mask[*aj])) { 925 if (b) bb[*ridx] -= *aa*xx[*aj]; 926 *aa = 0.0; 927 } 928 aa++; 929 aj++; 930 } 931 ridx++; 932 } 933 } else { /* do not use compressed row format */ 934 m = l->B->rmap->n; 935 for (i=0; i<m; i++) { 936 n = ii[i+1] - ii[i]; 937 aj = aij->j + ii[i]; 938 aa = aij_a + ii[i]; 939 for (j=0; j<n; j++) { 940 if (PetscAbsScalar(mask[*aj])) { 941 if (b) bb[i] -= *aa*xx[*aj]; 942 *aa = 0.0; 943 } 944 aa++; 945 aj++; 946 } 947 } 948 } 949 if (x && b) { 950 PetscCall(VecRestoreArray(b,&bb)); 951 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 952 } 953 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 954 PetscCall(VecRestoreArray(lmask,&mask)); 955 PetscCall(VecDestroy(&lmask)); 956 PetscCall(PetscFree(lrows)); 957 958 /* only change matrix nonzero state if pattern was allowed to be changed */ 959 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 960 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 961 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscInt nt; 970 VecScatter Mvctx = a->Mvctx; 971 972 PetscFunctionBegin; 973 PetscCall(VecGetLocalSize(xx,&nt)); 974 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 975 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 976 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 977 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 978 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 986 PetscFunctionBegin; 987 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 992 { 993 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 994 VecScatter Mvctx = a->Mvctx; 995 996 PetscFunctionBegin; 997 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 998 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 999 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1000 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1005 { 1006 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1007 1008 PetscFunctionBegin; 1009 /* do nondiagonal part */ 1010 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1011 /* do local part */ 1012 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1013 /* add partial results together */ 1014 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1015 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1020 { 1021 MPI_Comm comm; 1022 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1023 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1024 IS Me,Notme; 1025 PetscInt M,N,first,last,*notme,i; 1026 PetscBool lf; 1027 PetscMPIInt size; 1028 1029 PetscFunctionBegin; 1030 /* Easy test: symmetric diagonal block */ 1031 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1032 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1033 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1034 if (!*f) PetscFunctionReturn(0); 1035 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1036 PetscCallMPI(MPI_Comm_size(comm,&size)); 1037 if (size == 1) PetscFunctionReturn(0); 1038 1039 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1040 PetscCall(MatGetSize(Amat,&M,&N)); 1041 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1042 PetscCall(PetscMalloc1(N-last+first,¬me)); 1043 for (i=0; i<first; i++) notme[i] = i; 1044 for (i=last; i<M; i++) notme[i-last+first] = i; 1045 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1046 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1047 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1048 Aoff = Aoffs[0]; 1049 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1050 Boff = Boffs[0]; 1051 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1052 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1053 PetscCall(MatDestroyMatrices(1,&Boffs)); 1054 PetscCall(ISDestroy(&Me)); 1055 PetscCall(ISDestroy(&Notme)); 1056 PetscCall(PetscFree(notme)); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1061 { 1062 PetscFunctionBegin; 1063 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 1071 PetscFunctionBegin; 1072 /* do nondiagonal part */ 1073 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1074 /* do local part */ 1075 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1076 /* add partial results together */ 1077 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1078 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /* 1083 This only works correctly for square matrices where the subblock A->A is the 1084 diagonal block 1085 */ 1086 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 1090 PetscFunctionBegin; 1091 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1092 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1093 PetscCall(MatGetDiagonal(a->A,v)); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 1101 PetscFunctionBegin; 1102 PetscCall(MatScale(a->A,aa)); 1103 PetscCall(MatScale(a->B,aa)); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1108 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1109 { 1110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1111 1112 PetscFunctionBegin; 1113 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1114 PetscCall(PetscFree(aij->Aperm1)); 1115 PetscCall(PetscFree(aij->Bperm1)); 1116 PetscCall(PetscFree(aij->Ajmap1)); 1117 PetscCall(PetscFree(aij->Bjmap1)); 1118 1119 PetscCall(PetscFree(aij->Aimap2)); 1120 PetscCall(PetscFree(aij->Bimap2)); 1121 PetscCall(PetscFree(aij->Aperm2)); 1122 PetscCall(PetscFree(aij->Bperm2)); 1123 PetscCall(PetscFree(aij->Ajmap2)); 1124 PetscCall(PetscFree(aij->Bjmap2)); 1125 1126 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1127 PetscCall(PetscFree(aij->Cperm1)); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1132 { 1133 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1134 1135 PetscFunctionBegin; 1136 #if defined(PETSC_USE_LOG) 1137 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1138 #endif 1139 PetscCall(MatStashDestroy_Private(&mat->stash)); 1140 PetscCall(VecDestroy(&aij->diag)); 1141 PetscCall(MatDestroy(&aij->A)); 1142 PetscCall(MatDestroy(&aij->B)); 1143 #if defined(PETSC_USE_CTABLE) 1144 PetscCall(PetscTableDestroy(&aij->colmap)); 1145 #else 1146 PetscCall(PetscFree(aij->colmap)); 1147 #endif 1148 PetscCall(PetscFree(aij->garray)); 1149 PetscCall(VecDestroy(&aij->lvec)); 1150 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1151 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1152 PetscCall(PetscFree(aij->ld)); 1153 1154 /* Free COO */ 1155 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1156 1157 PetscCall(PetscFree(mat->data)); 1158 1159 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1160 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1161 1162 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1172 #if defined(PETSC_HAVE_CUDA) 1173 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1174 #endif 1175 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1177 #endif 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1181 #endif 1182 #if defined(PETSC_HAVE_SCALAPACK) 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1184 #endif 1185 #if defined(PETSC_HAVE_HYPRE) 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1188 #endif 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1195 #if defined(PETSC_HAVE_MKL_SPARSE) 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1197 #endif 1198 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1202 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa,*ba; 1213 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1214 PetscInt *rowlens; 1215 PetscInt *colidxs; 1216 PetscScalar *matvals; 1217 1218 PetscFunctionBegin; 1219 PetscCall(PetscViewerSetUp(viewer)); 1220 1221 M = mat->rmap->N; 1222 N = mat->cmap->N; 1223 m = mat->rmap->n; 1224 rs = mat->rmap->rstart; 1225 cs = mat->cmap->rstart; 1226 nz = A->nz + B->nz; 1227 1228 /* write matrix header */ 1229 header[0] = MAT_FILE_CLASSID; 1230 header[1] = M; header[2] = N; header[3] = nz; 1231 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1232 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1233 1234 /* fill in and store row lengths */ 1235 PetscCall(PetscMalloc1(m,&rowlens)); 1236 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1237 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1238 PetscCall(PetscFree(rowlens)); 1239 1240 /* fill in and store column indices */ 1241 PetscCall(PetscMalloc1(nz,&colidxs)); 1242 for (cnt=0, i=0; i<m; i++) { 1243 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 colidxs[cnt++] = garray[B->j[jb]]; 1246 } 1247 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1248 colidxs[cnt++] = A->j[ja] + cs; 1249 for (; jb<B->i[i+1]; jb++) 1250 colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1254 PetscCall(PetscFree(colidxs)); 1255 1256 /* fill in and store nonzero values */ 1257 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1258 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1259 PetscCall(PetscMalloc1(nz,&matvals)); 1260 for (cnt=0, i=0; i<m; i++) { 1261 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1262 if (garray[B->j[jb]] > cs) break; 1263 matvals[cnt++] = ba[jb]; 1264 } 1265 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1266 matvals[cnt++] = aa[ja]; 1267 for (; jb<B->i[i+1]; jb++) 1268 matvals[cnt++] = ba[jb]; 1269 } 1270 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1272 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1273 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1274 PetscCall(PetscFree(matvals)); 1275 1276 /* write block size option to the viewer's .info file */ 1277 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #include <petscdraw.h> 1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1285 PetscMPIInt rank = aij->rank,size = aij->size; 1286 PetscBool isdraw,iascii,isbinary; 1287 PetscViewer sviewer; 1288 PetscViewerFormat format; 1289 1290 PetscFunctionBegin; 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1294 if (iascii) { 1295 PetscCall(PetscViewerGetFormat(viewer,&format)); 1296 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1297 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1298 PetscCall(PetscMalloc1(size,&nz)); 1299 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1300 for (i=0; i<(PetscInt)size; i++) { 1301 nmax = PetscMax(nmax,nz[i]); 1302 nmin = PetscMin(nmin,nz[i]); 1303 navg += nz[i]; 1304 } 1305 PetscCall(PetscFree(nz)); 1306 navg = navg/size; 1307 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1308 PetscFunctionReturn(0); 1309 } 1310 PetscCall(PetscViewerGetFormat(viewer,&format)); 1311 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1312 MatInfo info; 1313 PetscInt *inodes=NULL; 1314 1315 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1316 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1317 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1318 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1319 if (!inodes) { 1320 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1321 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1322 } else { 1323 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1324 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1325 } 1326 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1328 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1329 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1330 PetscCall(PetscViewerFlush(viewer)); 1331 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1332 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1333 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1334 PetscFunctionReturn(0); 1335 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1336 PetscInt inodecount,inodelimit,*inodes; 1337 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1338 if (inodes) { 1339 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1340 } else { 1341 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1342 } 1343 PetscFunctionReturn(0); 1344 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1345 PetscFunctionReturn(0); 1346 } 1347 } else if (isbinary) { 1348 if (size == 1) { 1349 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1350 PetscCall(MatView(aij->A,viewer)); 1351 } else { 1352 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1353 } 1354 PetscFunctionReturn(0); 1355 } else if (iascii && size == 1) { 1356 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1357 PetscCall(MatView(aij->A,viewer)); 1358 PetscFunctionReturn(0); 1359 } else if (isdraw) { 1360 PetscDraw draw; 1361 PetscBool isnull; 1362 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1363 PetscCall(PetscDrawIsNull(draw,&isnull)); 1364 if (isnull) PetscFunctionReturn(0); 1365 } 1366 1367 { /* assemble the entire matrix onto first processor */ 1368 Mat A = NULL, Av; 1369 IS isrow,iscol; 1370 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1373 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1374 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1375 /* The commented code uses MatCreateSubMatrices instead */ 1376 /* 1377 Mat *AA, A = NULL, Av; 1378 IS isrow,iscol; 1379 1380 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1382 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1383 if (rank == 0) { 1384 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1385 A = AA[0]; 1386 Av = AA[0]; 1387 } 1388 PetscCall(MatDestroySubMatrices(1,&AA)); 1389 */ 1390 PetscCall(ISDestroy(&iscol)); 1391 PetscCall(ISDestroy(&isrow)); 1392 /* 1393 Everyone has to call to draw the matrix since the graphics waits are 1394 synchronized across all processors that share the PetscDraw object 1395 */ 1396 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1397 if (rank == 0) { 1398 if (((PetscObject)mat)->name) { 1399 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1400 } 1401 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1402 } 1403 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1404 PetscCall(PetscViewerFlush(viewer)); 1405 PetscCall(MatDestroy(&A)); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1411 { 1412 PetscBool iascii,isdraw,issocket,isbinary; 1413 1414 PetscFunctionBegin; 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1418 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1419 if (iascii || isdraw || isbinary || issocket) { 1420 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1426 { 1427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1428 Vec bb1 = NULL; 1429 PetscBool hasop; 1430 1431 PetscFunctionBegin; 1432 if (flag == SOR_APPLY_UPPER) { 1433 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1438 PetscCall(VecDuplicate(bb,&bb1)); 1439 } 1440 1441 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1442 if (flag & SOR_ZERO_INITIAL_GUESS) { 1443 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1444 its--; 1445 } 1446 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec,-1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1457 } 1458 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1461 its--; 1462 } 1463 while (its--) { 1464 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 PetscCall(VecScale(mat->lvec,-1.0)); 1469 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1470 1471 /* local sweep */ 1472 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1473 } 1474 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1475 if (flag & SOR_ZERO_INITIAL_GUESS) { 1476 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1477 its--; 1478 } 1479 while (its--) { 1480 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1481 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1482 1483 /* update rhs: bb1 = bb - B*x */ 1484 PetscCall(VecScale(mat->lvec,-1.0)); 1485 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1486 1487 /* local sweep */ 1488 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1489 } 1490 } else if (flag & SOR_EISENSTAT) { 1491 Vec xx1; 1492 1493 PetscCall(VecDuplicate(bb,&xx1)); 1494 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1495 1496 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1497 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1498 if (!mat->diag) { 1499 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1500 PetscCall(MatGetDiagonal(matin,mat->diag)); 1501 } 1502 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1503 if (hasop) { 1504 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1505 } else { 1506 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1507 } 1508 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1509 1510 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1511 1512 /* local sweep */ 1513 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1514 PetscCall(VecAXPY(xx,1.0,xx1)); 1515 PetscCall(VecDestroy(&xx1)); 1516 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1517 1518 PetscCall(VecDestroy(&bb1)); 1519 1520 matin->factorerrortype = mat->A->factorerrortype; 1521 PetscFunctionReturn(0); 1522 } 1523 1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1525 { 1526 Mat aA,aB,Aperm; 1527 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1528 PetscScalar *aa,*ba; 1529 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1530 PetscSF rowsf,sf; 1531 IS parcolp = NULL; 1532 PetscBool done; 1533 1534 PetscFunctionBegin; 1535 PetscCall(MatGetLocalSize(A,&m,&n)); 1536 PetscCall(ISGetIndices(rowp,&rwant)); 1537 PetscCall(ISGetIndices(colp,&cwant)); 1538 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1539 1540 /* Invert row permutation to find out where my rows should go */ 1541 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1542 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1543 PetscCall(PetscSFSetFromOptions(rowsf)); 1544 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1545 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1546 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1547 1548 /* Invert column permutation to find out where my columns should go */ 1549 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1550 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1551 PetscCall(PetscSFSetFromOptions(sf)); 1552 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1553 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1554 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1555 PetscCall(PetscSFDestroy(&sf)); 1556 1557 PetscCall(ISRestoreIndices(rowp,&rwant)); 1558 PetscCall(ISRestoreIndices(colp,&cwant)); 1559 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1560 1561 /* Find out where my gcols should go */ 1562 PetscCall(MatGetSize(aB,NULL,&ng)); 1563 PetscCall(PetscMalloc1(ng,&gcdest)); 1564 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1565 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1566 PetscCall(PetscSFSetFromOptions(sf)); 1567 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1568 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1569 PetscCall(PetscSFDestroy(&sf)); 1570 1571 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1572 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1573 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1574 for (i=0; i<m; i++) { 1575 PetscInt row = rdest[i]; 1576 PetscMPIInt rowner; 1577 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1578 for (j=ai[i]; j<ai[i+1]; j++) { 1579 PetscInt col = cdest[aj[j]]; 1580 PetscMPIInt cowner; 1581 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1582 if (rowner == cowner) dnnz[i]++; 1583 else onnz[i]++; 1584 } 1585 for (j=bi[i]; j<bi[i+1]; j++) { 1586 PetscInt col = gcdest[bj[j]]; 1587 PetscMPIInt cowner; 1588 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1589 if (rowner == cowner) dnnz[i]++; 1590 else onnz[i]++; 1591 } 1592 } 1593 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1594 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1595 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1596 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1597 PetscCall(PetscSFDestroy(&rowsf)); 1598 1599 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1600 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1601 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1602 for (i=0; i<m; i++) { 1603 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1604 PetscInt j0,rowlen; 1605 rowlen = ai[i+1] - ai[i]; 1606 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1607 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1608 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1609 } 1610 rowlen = bi[i+1] - bi[i]; 1611 for (j0=j=0; j<rowlen; j0=j) { 1612 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1613 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1614 } 1615 } 1616 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1617 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1618 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1619 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1620 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1621 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1622 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1623 PetscCall(PetscFree3(work,rdest,cdest)); 1624 PetscCall(PetscFree(gcdest)); 1625 if (parcolp) PetscCall(ISDestroy(&colp)); 1626 *B = Aperm; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1631 { 1632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1633 1634 PetscFunctionBegin; 1635 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1636 if (ghosts) *ghosts = aij->garray; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1641 { 1642 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1643 Mat A = mat->A,B = mat->B; 1644 PetscLogDouble isend[5],irecv[5]; 1645 1646 PetscFunctionBegin; 1647 info->block_size = 1.0; 1648 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1649 1650 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1651 isend[3] = info->memory; isend[4] = info->mallocs; 1652 1653 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1654 1655 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1656 isend[3] += info->memory; isend[4] += info->mallocs; 1657 if (flag == MAT_LOCAL) { 1658 info->nz_used = isend[0]; 1659 info->nz_allocated = isend[1]; 1660 info->nz_unneeded = isend[2]; 1661 info->memory = isend[3]; 1662 info->mallocs = isend[4]; 1663 } else if (flag == MAT_GLOBAL_MAX) { 1664 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } else if (flag == MAT_GLOBAL_SUM) { 1672 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } 1680 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1681 info->fill_ratio_needed = 0; 1682 info->factor_mallocs = 0; 1683 PetscFunctionReturn(0); 1684 } 1685 1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 1690 PetscFunctionBegin; 1691 switch (op) { 1692 case MAT_NEW_NONZERO_LOCATIONS: 1693 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1694 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1695 case MAT_KEEP_NONZERO_PATTERN: 1696 case MAT_NEW_NONZERO_LOCATION_ERR: 1697 case MAT_USE_INODES: 1698 case MAT_IGNORE_ZERO_ENTRIES: 1699 case MAT_FORM_EXPLICIT_TRANSPOSE: 1700 MatCheckPreallocated(A,1); 1701 PetscCall(MatSetOption(a->A,op,flg)); 1702 PetscCall(MatSetOption(a->B,op,flg)); 1703 break; 1704 case MAT_ROW_ORIENTED: 1705 MatCheckPreallocated(A,1); 1706 a->roworiented = flg; 1707 1708 PetscCall(MatSetOption(a->A,op,flg)); 1709 PetscCall(MatSetOption(a->B,op,flg)); 1710 break; 1711 case MAT_FORCE_DIAGONAL_ENTRIES: 1712 case MAT_SORTED_FULL: 1713 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1714 break; 1715 case MAT_IGNORE_OFF_PROC_ENTRIES: 1716 a->donotstash = flg; 1717 break; 1718 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1719 case MAT_SPD: 1720 case MAT_SYMMETRIC: 1721 case MAT_STRUCTURALLY_SYMMETRIC: 1722 case MAT_HERMITIAN: 1723 case MAT_SYMMETRY_ETERNAL: 1724 break; 1725 case MAT_SUBMAT_SINGLEIS: 1726 A->submat_singleis = flg; 1727 break; 1728 case MAT_STRUCTURE_ONLY: 1729 /* The option is handled directly by MatSetOption() */ 1730 break; 1731 default: 1732 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1733 } 1734 PetscFunctionReturn(0); 1735 } 1736 1737 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1738 { 1739 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1740 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1741 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1742 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1743 PetscInt *cmap,*idx_p; 1744 1745 PetscFunctionBegin; 1746 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1747 mat->getrowactive = PETSC_TRUE; 1748 1749 if (!mat->rowvalues && (idx || v)) { 1750 /* 1751 allocate enough space to hold information from the longest row. 1752 */ 1753 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1754 PetscInt max = 1,tmp; 1755 for (i=0; i<matin->rmap->n; i++) { 1756 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1757 if (max < tmp) max = tmp; 1758 } 1759 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1760 } 1761 1762 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1763 lrow = row - rstart; 1764 1765 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1766 if (!v) {pvA = NULL; pvB = NULL;} 1767 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1768 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1769 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1770 nztot = nzA + nzB; 1771 1772 cmap = mat->garray; 1773 if (v || idx) { 1774 if (nztot) { 1775 /* Sort by increasing column numbers, assuming A and B already sorted */ 1776 PetscInt imark = -1; 1777 if (v) { 1778 *v = v_p = mat->rowvalues; 1779 for (i=0; i<nzB; i++) { 1780 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1781 else break; 1782 } 1783 imark = i; 1784 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1785 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1786 } 1787 if (idx) { 1788 *idx = idx_p = mat->rowindices; 1789 if (imark > -1) { 1790 for (i=0; i<imark; i++) { 1791 idx_p[i] = cmap[cworkB[i]]; 1792 } 1793 } else { 1794 for (i=0; i<nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1796 else break; 1797 } 1798 imark = i; 1799 } 1800 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1801 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 if (idx) *idx = NULL; 1805 if (v) *v = NULL; 1806 } 1807 } 1808 *nz = nztot; 1809 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1810 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1815 { 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1817 1818 PetscFunctionBegin; 1819 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1820 aij->getrowactive = PETSC_FALSE; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1825 { 1826 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1827 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1828 PetscInt i,j,cstart = mat->cmap->rstart; 1829 PetscReal sum = 0.0; 1830 const MatScalar *v,*amata,*bmata; 1831 1832 PetscFunctionBegin; 1833 if (aij->size == 1) { 1834 PetscCall(MatNorm(aij->A,type,norm)); 1835 } else { 1836 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1837 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1838 if (type == NORM_FROBENIUS) { 1839 v = amata; 1840 for (i=0; i<amat->nz; i++) { 1841 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1842 } 1843 v = bmata; 1844 for (i=0; i<bmat->nz; i++) { 1845 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1846 } 1847 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1848 *norm = PetscSqrtReal(*norm); 1849 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1850 } else if (type == NORM_1) { /* max column norm */ 1851 PetscReal *tmp,*tmp2; 1852 PetscInt *jj,*garray = aij->garray; 1853 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1854 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1855 *norm = 0.0; 1856 v = amata; jj = amat->j; 1857 for (j=0; j<amat->nz; j++) { 1858 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1859 } 1860 v = bmata; jj = bmat->j; 1861 for (j=0; j<bmat->nz; j++) { 1862 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1863 } 1864 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1865 for (j=0; j<mat->cmap->N; j++) { 1866 if (tmp2[j] > *norm) *norm = tmp2[j]; 1867 } 1868 PetscCall(PetscFree(tmp)); 1869 PetscCall(PetscFree(tmp2)); 1870 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1871 } else if (type == NORM_INFINITY) { /* max row norm */ 1872 PetscReal ntemp = 0.0; 1873 for (j=0; j<aij->A->rmap->n; j++) { 1874 v = amata + amat->i[j]; 1875 sum = 0.0; 1876 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1877 sum += PetscAbsScalar(*v); v++; 1878 } 1879 v = bmata + bmat->i[j]; 1880 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); v++; 1882 } 1883 if (sum > ntemp) ntemp = sum; 1884 } 1885 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1886 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1887 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1888 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1889 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1895 { 1896 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1897 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1898 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1899 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1900 Mat B,A_diag,*B_diag; 1901 const MatScalar *pbv,*bv; 1902 1903 PetscFunctionBegin; 1904 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1905 ai = Aloc->i; aj = Aloc->j; 1906 bi = Bloc->i; bj = Bloc->j; 1907 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1908 PetscInt *d_nnz,*g_nnz,*o_nnz; 1909 PetscSFNode *oloc; 1910 PETSC_UNUSED PetscSF sf; 1911 1912 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1913 /* compute d_nnz for preallocation */ 1914 PetscCall(PetscArrayzero(d_nnz,na)); 1915 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1916 /* compute local off-diagonal contributions */ 1917 PetscCall(PetscArrayzero(g_nnz,nb)); 1918 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1919 /* map those to global */ 1920 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1921 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1922 PetscCall(PetscSFSetFromOptions(sf)); 1923 PetscCall(PetscArrayzero(o_nnz,na)); 1924 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1925 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1926 PetscCall(PetscSFDestroy(&sf)); 1927 1928 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1929 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1930 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1931 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1932 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1933 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1934 } else { 1935 B = *matout; 1936 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1937 } 1938 1939 b = (Mat_MPIAIJ*)B->data; 1940 A_diag = a->A; 1941 B_diag = &b->A; 1942 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1943 A_diag_ncol = A_diag->cmap->N; 1944 B_diag_ilen = sub_B_diag->ilen; 1945 B_diag_i = sub_B_diag->i; 1946 1947 /* Set ilen for diagonal of B */ 1948 for (i=0; i<A_diag_ncol; i++) { 1949 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1950 } 1951 1952 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1953 very quickly (=without using MatSetValues), because all writes are local. */ 1954 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1955 1956 /* copy over the B part */ 1957 PetscCall(PetscMalloc1(bi[mb],&cols)); 1958 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1959 pbv = bv; 1960 row = A->rmap->rstart; 1961 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1962 cols_tmp = cols; 1963 for (i=0; i<mb; i++) { 1964 ncol = bi[i+1]-bi[i]; 1965 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1966 row++; 1967 pbv += ncol; cols_tmp += ncol; 1968 } 1969 PetscCall(PetscFree(cols)); 1970 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1971 1972 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1973 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1974 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1975 *matout = B; 1976 } else { 1977 PetscCall(MatHeaderMerge(A,&B)); 1978 } 1979 PetscFunctionReturn(0); 1980 } 1981 1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1983 { 1984 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1985 Mat a = aij->A,b = aij->B; 1986 PetscInt s1,s2,s3; 1987 1988 PetscFunctionBegin; 1989 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1990 if (rr) { 1991 PetscCall(VecGetLocalSize(rr,&s1)); 1992 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1993 /* Overlap communication with computation. */ 1994 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1995 } 1996 if (ll) { 1997 PetscCall(VecGetLocalSize(ll,&s1)); 1998 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1999 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 2000 } 2001 /* scale the diagonal block */ 2002 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2003 2004 if (rr) { 2005 /* Do a scatter end and then right scale the off-diagonal block */ 2006 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2007 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2013 { 2014 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2015 2016 PetscFunctionBegin; 2017 PetscCall(MatSetUnfactored(a->A)); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2022 { 2023 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2024 Mat a,b,c,d; 2025 PetscBool flg; 2026 2027 PetscFunctionBegin; 2028 a = matA->A; b = matA->B; 2029 c = matB->A; d = matB->B; 2030 2031 PetscCall(MatEqual(a,c,&flg)); 2032 if (flg) { 2033 PetscCall(MatEqual(b,d,&flg)); 2034 } 2035 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2040 { 2041 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2042 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2043 2044 PetscFunctionBegin; 2045 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2046 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2047 /* because of the column compression in the off-processor part of the matrix a->B, 2048 the number of columns in a->B and b->B may be different, hence we cannot call 2049 the MatCopy() directly on the two parts. If need be, we can provide a more 2050 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2051 then copying the submatrices */ 2052 PetscCall(MatCopy_Basic(A,B,str)); 2053 } else { 2054 PetscCall(MatCopy(a->A,b->A,str)); 2055 PetscCall(MatCopy(a->B,b->B,str)); 2056 } 2057 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2062 { 2063 PetscFunctionBegin; 2064 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2073 { 2074 PetscInt i,j,k,nzx,nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i=0; i<m; i++) { 2079 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2080 nzx = xi[i+1] - xi[i]; 2081 nzy = yi[i+1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2084 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k<nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2095 { 2096 PetscInt m = Y->rmap->N; 2097 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2098 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2099 2100 PetscFunctionBegin; 2101 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2106 { 2107 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2108 2109 PetscFunctionBegin; 2110 if (str == SAME_NONZERO_PATTERN) { 2111 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2112 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2113 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2114 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2115 } else { 2116 Mat B; 2117 PetscInt *nnz_d,*nnz_o; 2118 2119 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2120 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2121 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2122 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2123 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2124 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2125 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2126 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2127 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2128 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2129 PetscCall(MatHeaderMerge(Y,&B)); 2130 PetscCall(PetscFree(nnz_d)); 2131 PetscCall(PetscFree(nnz_o)); 2132 } 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2137 2138 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2139 { 2140 PetscFunctionBegin; 2141 if (PetscDefined(USE_COMPLEX)) { 2142 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2143 2144 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2145 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatRealPart(a->A)); 2156 PetscCall(MatRealPart(a->B)); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2163 2164 PetscFunctionBegin; 2165 PetscCall(MatImaginaryPart(a->A)); 2166 PetscCall(MatImaginaryPart(a->B)); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2171 { 2172 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2173 PetscInt i,*idxb = NULL,m = A->rmap->n; 2174 PetscScalar *va,*vv; 2175 Vec vB,vA; 2176 const PetscScalar *vb; 2177 2178 PetscFunctionBegin; 2179 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2180 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2181 2182 PetscCall(VecGetArrayWrite(vA,&va)); 2183 if (idx) { 2184 for (i=0; i<m; i++) { 2185 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2186 } 2187 } 2188 2189 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2190 PetscCall(PetscMalloc1(m,&idxb)); 2191 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2192 2193 PetscCall(VecGetArrayWrite(v,&vv)); 2194 PetscCall(VecGetArrayRead(vB,&vb)); 2195 for (i=0; i<m; i++) { 2196 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2197 vv[i] = vb[i]; 2198 if (idx) idx[i] = a->garray[idxb[i]]; 2199 } else { 2200 vv[i] = va[i]; 2201 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2202 idx[i] = a->garray[idxb[i]]; 2203 } 2204 } 2205 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2206 PetscCall(VecRestoreArrayWrite(vA,&va)); 2207 PetscCall(VecRestoreArrayRead(vB,&vb)); 2208 PetscCall(PetscFree(idxb)); 2209 PetscCall(VecDestroy(&vA)); 2210 PetscCall(VecDestroy(&vB)); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2215 { 2216 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2217 PetscInt m = A->rmap->n,n = A->cmap->n; 2218 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2219 PetscInt *cmap = mat->garray; 2220 PetscInt *diagIdx, *offdiagIdx; 2221 Vec diagV, offdiagV; 2222 PetscScalar *a, *diagA, *offdiagA; 2223 const PetscScalar *ba,*bav; 2224 PetscInt r,j,col,ncols,*bi,*bj; 2225 Mat B = mat->B; 2226 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2227 2228 PetscFunctionBegin; 2229 /* When a process holds entire A and other processes have no entry */ 2230 if (A->cmap->N == n) { 2231 PetscCall(VecGetArrayWrite(v,&diagA)); 2232 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2233 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2234 PetscCall(VecDestroy(&diagV)); 2235 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2236 PetscFunctionReturn(0); 2237 } else if (n == 0) { 2238 if (m) { 2239 PetscCall(VecGetArrayWrite(v,&a)); 2240 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2241 PetscCall(VecRestoreArrayWrite(v,&a)); 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2250 2251 /* Get offdiagIdx[] for implicit 0.0 */ 2252 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2253 ba = bav; 2254 bi = b->i; 2255 bj = b->j; 2256 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2257 for (r = 0; r < m; r++) { 2258 ncols = bi[r+1] - bi[r]; 2259 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2260 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2261 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2262 offdiagA[r] = 0.0; 2263 2264 /* Find first hole in the cmap */ 2265 for (j=0; j<ncols; j++) { 2266 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2267 if (col > j && j < cstart) { 2268 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2269 break; 2270 } else if (col > j + n && j >= cstart) { 2271 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2272 break; 2273 } 2274 } 2275 if (j == ncols && ncols < A->cmap->N - n) { 2276 /* a hole is outside compressed Bcols */ 2277 if (ncols == 0) { 2278 if (cstart) { 2279 offdiagIdx[r] = 0; 2280 } else offdiagIdx[r] = cend; 2281 } else { /* ncols > 0 */ 2282 offdiagIdx[r] = cmap[ncols-1] + 1; 2283 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2284 } 2285 } 2286 } 2287 2288 for (j=0; j<ncols; j++) { 2289 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2290 ba++; bj++; 2291 } 2292 } 2293 2294 PetscCall(VecGetArrayWrite(v, &a)); 2295 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2296 for (r = 0; r < m; ++r) { 2297 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2298 a[r] = diagA[r]; 2299 if (idx) idx[r] = cstart + diagIdx[r]; 2300 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2301 a[r] = diagA[r]; 2302 if (idx) { 2303 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2304 idx[r] = cstart + diagIdx[r]; 2305 } else idx[r] = offdiagIdx[r]; 2306 } 2307 } else { 2308 a[r] = offdiagA[r]; 2309 if (idx) idx[r] = offdiagIdx[r]; 2310 } 2311 } 2312 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2313 PetscCall(VecRestoreArrayWrite(v, &a)); 2314 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2315 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2316 PetscCall(VecDestroy(&diagV)); 2317 PetscCall(VecDestroy(&offdiagV)); 2318 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2323 { 2324 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2325 PetscInt m = A->rmap->n,n = A->cmap->n; 2326 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2327 PetscInt *cmap = mat->garray; 2328 PetscInt *diagIdx, *offdiagIdx; 2329 Vec diagV, offdiagV; 2330 PetscScalar *a, *diagA, *offdiagA; 2331 const PetscScalar *ba,*bav; 2332 PetscInt r,j,col,ncols,*bi,*bj; 2333 Mat B = mat->B; 2334 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2335 2336 PetscFunctionBegin; 2337 /* When a process holds entire A and other processes have no entry */ 2338 if (A->cmap->N == n) { 2339 PetscCall(VecGetArrayWrite(v,&diagA)); 2340 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2341 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2342 PetscCall(VecDestroy(&diagV)); 2343 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2344 PetscFunctionReturn(0); 2345 } else if (n == 0) { 2346 if (m) { 2347 PetscCall(VecGetArrayWrite(v,&a)); 2348 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2349 PetscCall(VecRestoreArrayWrite(v,&a)); 2350 } 2351 PetscFunctionReturn(0); 2352 } 2353 2354 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2355 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2357 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2358 2359 /* Get offdiagIdx[] for implicit 0.0 */ 2360 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2361 ba = bav; 2362 bi = b->i; 2363 bj = b->j; 2364 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2365 for (r = 0; r < m; r++) { 2366 ncols = bi[r+1] - bi[r]; 2367 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2368 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2369 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2370 offdiagA[r] = 0.0; 2371 2372 /* Find first hole in the cmap */ 2373 for (j=0; j<ncols; j++) { 2374 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2375 if (col > j && j < cstart) { 2376 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2377 break; 2378 } else if (col > j + n && j >= cstart) { 2379 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2380 break; 2381 } 2382 } 2383 if (j == ncols && ncols < A->cmap->N - n) { 2384 /* a hole is outside compressed Bcols */ 2385 if (ncols == 0) { 2386 if (cstart) { 2387 offdiagIdx[r] = 0; 2388 } else offdiagIdx[r] = cend; 2389 } else { /* ncols > 0 */ 2390 offdiagIdx[r] = cmap[ncols-1] + 1; 2391 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2392 } 2393 } 2394 } 2395 2396 for (j=0; j<ncols; j++) { 2397 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2398 ba++; bj++; 2399 } 2400 } 2401 2402 PetscCall(VecGetArrayWrite(v, &a)); 2403 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2404 for (r = 0; r < m; ++r) { 2405 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2406 a[r] = diagA[r]; 2407 if (idx) idx[r] = cstart + diagIdx[r]; 2408 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 if (idx) { 2411 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2412 idx[r] = cstart + diagIdx[r]; 2413 } else idx[r] = offdiagIdx[r]; 2414 } 2415 } else { 2416 a[r] = offdiagA[r]; 2417 if (idx) idx[r] = offdiagIdx[r]; 2418 } 2419 } 2420 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2421 PetscCall(VecRestoreArrayWrite(v, &a)); 2422 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2423 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2424 PetscCall(VecDestroy(&diagV)); 2425 PetscCall(VecDestroy(&offdiagV)); 2426 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2431 { 2432 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2433 PetscInt m = A->rmap->n,n = A->cmap->n; 2434 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2435 PetscInt *cmap = mat->garray; 2436 PetscInt *diagIdx, *offdiagIdx; 2437 Vec diagV, offdiagV; 2438 PetscScalar *a, *diagA, *offdiagA; 2439 const PetscScalar *ba,*bav; 2440 PetscInt r,j,col,ncols,*bi,*bj; 2441 Mat B = mat->B; 2442 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2443 2444 PetscFunctionBegin; 2445 /* When a process holds entire A and other processes have no entry */ 2446 if (A->cmap->N == n) { 2447 PetscCall(VecGetArrayWrite(v,&diagA)); 2448 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2449 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2450 PetscCall(VecDestroy(&diagV)); 2451 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2452 PetscFunctionReturn(0); 2453 } else if (n == 0) { 2454 if (m) { 2455 PetscCall(VecGetArrayWrite(v,&a)); 2456 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2457 PetscCall(VecRestoreArrayWrite(v,&a)); 2458 } 2459 PetscFunctionReturn(0); 2460 } 2461 2462 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2463 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2464 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2465 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2466 2467 /* Get offdiagIdx[] for implicit 0.0 */ 2468 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2469 ba = bav; 2470 bi = b->i; 2471 bj = b->j; 2472 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2473 for (r = 0; r < m; r++) { 2474 ncols = bi[r+1] - bi[r]; 2475 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2476 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2477 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2478 offdiagA[r] = 0.0; 2479 2480 /* Find first hole in the cmap */ 2481 for (j=0; j<ncols; j++) { 2482 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2483 if (col > j && j < cstart) { 2484 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2485 break; 2486 } else if (col > j + n && j >= cstart) { 2487 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2488 break; 2489 } 2490 } 2491 if (j == ncols && ncols < A->cmap->N - n) { 2492 /* a hole is outside compressed Bcols */ 2493 if (ncols == 0) { 2494 if (cstart) { 2495 offdiagIdx[r] = 0; 2496 } else offdiagIdx[r] = cend; 2497 } else { /* ncols > 0 */ 2498 offdiagIdx[r] = cmap[ncols-1] + 1; 2499 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2500 } 2501 } 2502 } 2503 2504 for (j=0; j<ncols; j++) { 2505 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2506 ba++; bj++; 2507 } 2508 } 2509 2510 PetscCall(VecGetArrayWrite(v, &a)); 2511 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2512 for (r = 0; r < m; ++r) { 2513 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2514 a[r] = diagA[r]; 2515 if (idx) idx[r] = cstart + diagIdx[r]; 2516 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2517 a[r] = diagA[r]; 2518 if (idx) { 2519 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2520 idx[r] = cstart + diagIdx[r]; 2521 } else idx[r] = offdiagIdx[r]; 2522 } 2523 } else { 2524 a[r] = offdiagA[r]; 2525 if (idx) idx[r] = offdiagIdx[r]; 2526 } 2527 } 2528 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2529 PetscCall(VecRestoreArrayWrite(v, &a)); 2530 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2531 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2532 PetscCall(VecDestroy(&diagV)); 2533 PetscCall(VecDestroy(&offdiagV)); 2534 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2539 { 2540 Mat *dummy; 2541 2542 PetscFunctionBegin; 2543 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2544 *newmat = *dummy; 2545 PetscCall(PetscFree(dummy)); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2550 { 2551 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2552 2553 PetscFunctionBegin; 2554 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2555 A->factorerrortype = a->A->factorerrortype; 2556 PetscFunctionReturn(0); 2557 } 2558 2559 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2560 { 2561 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2562 2563 PetscFunctionBegin; 2564 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2565 PetscCall(MatSetRandom(aij->A,rctx)); 2566 if (x->assembled) { 2567 PetscCall(MatSetRandom(aij->B,rctx)); 2568 } else { 2569 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2570 } 2571 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2572 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2577 { 2578 PetscFunctionBegin; 2579 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2580 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 /*@ 2585 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2586 2587 Collective on Mat 2588 2589 Input Parameters: 2590 + A - the matrix 2591 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2592 2593 Level: advanced 2594 2595 @*/ 2596 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2597 { 2598 PetscFunctionBegin; 2599 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2604 { 2605 PetscBool sc = PETSC_FALSE,flg; 2606 2607 PetscFunctionBegin; 2608 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2609 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2610 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2611 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2612 PetscOptionsHeadEnd(); 2613 PetscFunctionReturn(0); 2614 } 2615 2616 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2617 { 2618 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2619 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2620 2621 PetscFunctionBegin; 2622 if (!Y->preallocated) { 2623 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2624 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2625 PetscInt nonew = aij->nonew; 2626 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2627 aij->nonew = nonew; 2628 } 2629 PetscCall(MatShift_Basic(Y,a)); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2634 { 2635 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2636 2637 PetscFunctionBegin; 2638 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2639 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2640 if (d) { 2641 PetscInt rstart; 2642 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2643 *d += rstart; 2644 2645 } 2646 PetscFunctionReturn(0); 2647 } 2648 2649 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2650 { 2651 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2652 2653 PetscFunctionBegin; 2654 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2655 PetscFunctionReturn(0); 2656 } 2657 2658 /* -------------------------------------------------------------------*/ 2659 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2660 MatGetRow_MPIAIJ, 2661 MatRestoreRow_MPIAIJ, 2662 MatMult_MPIAIJ, 2663 /* 4*/ MatMultAdd_MPIAIJ, 2664 MatMultTranspose_MPIAIJ, 2665 MatMultTransposeAdd_MPIAIJ, 2666 NULL, 2667 NULL, 2668 NULL, 2669 /*10*/ NULL, 2670 NULL, 2671 NULL, 2672 MatSOR_MPIAIJ, 2673 MatTranspose_MPIAIJ, 2674 /*15*/ MatGetInfo_MPIAIJ, 2675 MatEqual_MPIAIJ, 2676 MatGetDiagonal_MPIAIJ, 2677 MatDiagonalScale_MPIAIJ, 2678 MatNorm_MPIAIJ, 2679 /*20*/ MatAssemblyBegin_MPIAIJ, 2680 MatAssemblyEnd_MPIAIJ, 2681 MatSetOption_MPIAIJ, 2682 MatZeroEntries_MPIAIJ, 2683 /*24*/ MatZeroRows_MPIAIJ, 2684 NULL, 2685 NULL, 2686 NULL, 2687 NULL, 2688 /*29*/ MatSetUp_MPIAIJ, 2689 NULL, 2690 NULL, 2691 MatGetDiagonalBlock_MPIAIJ, 2692 NULL, 2693 /*34*/ MatDuplicate_MPIAIJ, 2694 NULL, 2695 NULL, 2696 NULL, 2697 NULL, 2698 /*39*/ MatAXPY_MPIAIJ, 2699 MatCreateSubMatrices_MPIAIJ, 2700 MatIncreaseOverlap_MPIAIJ, 2701 MatGetValues_MPIAIJ, 2702 MatCopy_MPIAIJ, 2703 /*44*/ MatGetRowMax_MPIAIJ, 2704 MatScale_MPIAIJ, 2705 MatShift_MPIAIJ, 2706 MatDiagonalSet_MPIAIJ, 2707 MatZeroRowsColumns_MPIAIJ, 2708 /*49*/ MatSetRandom_MPIAIJ, 2709 MatGetRowIJ_MPIAIJ, 2710 MatRestoreRowIJ_MPIAIJ, 2711 NULL, 2712 NULL, 2713 /*54*/ MatFDColoringCreate_MPIXAIJ, 2714 NULL, 2715 MatSetUnfactored_MPIAIJ, 2716 MatPermute_MPIAIJ, 2717 NULL, 2718 /*59*/ MatCreateSubMatrix_MPIAIJ, 2719 MatDestroy_MPIAIJ, 2720 MatView_MPIAIJ, 2721 NULL, 2722 NULL, 2723 /*64*/ NULL, 2724 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2725 NULL, 2726 NULL, 2727 NULL, 2728 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2729 MatGetRowMinAbs_MPIAIJ, 2730 NULL, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*75*/ MatFDColoringApply_AIJ, 2735 MatSetFromOptions_MPIAIJ, 2736 NULL, 2737 NULL, 2738 MatFindZeroDiagonals_MPIAIJ, 2739 /*80*/ NULL, 2740 NULL, 2741 NULL, 2742 /*83*/ MatLoad_MPIAIJ, 2743 MatIsSymmetric_MPIAIJ, 2744 NULL, 2745 NULL, 2746 NULL, 2747 NULL, 2748 /*89*/ NULL, 2749 NULL, 2750 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2751 NULL, 2752 NULL, 2753 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2754 NULL, 2755 NULL, 2756 NULL, 2757 MatBindToCPU_MPIAIJ, 2758 /*99*/ MatProductSetFromOptions_MPIAIJ, 2759 NULL, 2760 NULL, 2761 MatConjugate_MPIAIJ, 2762 NULL, 2763 /*104*/MatSetValuesRow_MPIAIJ, 2764 MatRealPart_MPIAIJ, 2765 MatImaginaryPart_MPIAIJ, 2766 NULL, 2767 NULL, 2768 /*109*/NULL, 2769 NULL, 2770 MatGetRowMin_MPIAIJ, 2771 NULL, 2772 MatMissingDiagonal_MPIAIJ, 2773 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2774 NULL, 2775 MatGetGhosts_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*119*/MatMultDiagonalBlock_MPIAIJ, 2779 NULL, 2780 NULL, 2781 NULL, 2782 MatGetMultiProcBlock_MPIAIJ, 2783 /*124*/MatFindNonzeroRows_MPIAIJ, 2784 MatGetColumnReductions_MPIAIJ, 2785 MatInvertBlockDiagonal_MPIAIJ, 2786 MatInvertVariableBlockDiagonal_MPIAIJ, 2787 MatCreateSubMatricesMPI_MPIAIJ, 2788 /*129*/NULL, 2789 NULL, 2790 NULL, 2791 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2792 NULL, 2793 /*134*/NULL, 2794 NULL, 2795 NULL, 2796 NULL, 2797 NULL, 2798 /*139*/MatSetBlockSizes_MPIAIJ, 2799 NULL, 2800 NULL, 2801 MatFDColoringSetUp_MPIXAIJ, 2802 MatFindOffBlockDiagonalEntries_MPIAIJ, 2803 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2804 /*145*/NULL, 2805 NULL, 2806 NULL, 2807 MatCreateGraph_Simple_AIJ, 2808 MatFilter_AIJ 2809 }; 2810 2811 /* ----------------------------------------------------------------------------------------*/ 2812 2813 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2814 { 2815 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2816 2817 PetscFunctionBegin; 2818 PetscCall(MatStoreValues(aij->A)); 2819 PetscCall(MatStoreValues(aij->B)); 2820 PetscFunctionReturn(0); 2821 } 2822 2823 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2824 { 2825 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2826 2827 PetscFunctionBegin; 2828 PetscCall(MatRetrieveValues(aij->A)); 2829 PetscCall(MatRetrieveValues(aij->B)); 2830 PetscFunctionReturn(0); 2831 } 2832 2833 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2834 { 2835 Mat_MPIAIJ *b; 2836 PetscMPIInt size; 2837 2838 PetscFunctionBegin; 2839 PetscCall(PetscLayoutSetUp(B->rmap)); 2840 PetscCall(PetscLayoutSetUp(B->cmap)); 2841 b = (Mat_MPIAIJ*)B->data; 2842 2843 #if defined(PETSC_USE_CTABLE) 2844 PetscCall(PetscTableDestroy(&b->colmap)); 2845 #else 2846 PetscCall(PetscFree(b->colmap)); 2847 #endif 2848 PetscCall(PetscFree(b->garray)); 2849 PetscCall(VecDestroy(&b->lvec)); 2850 PetscCall(VecScatterDestroy(&b->Mvctx)); 2851 2852 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2853 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2854 PetscCall(MatDestroy(&b->B)); 2855 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2856 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2857 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2858 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2859 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2860 2861 if (!B->preallocated) { 2862 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2863 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2864 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2865 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2866 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2867 } 2868 2869 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2870 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2871 B->preallocated = PETSC_TRUE; 2872 B->was_assembled = PETSC_FALSE; 2873 B->assembled = PETSC_FALSE; 2874 PetscFunctionReturn(0); 2875 } 2876 2877 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2878 { 2879 Mat_MPIAIJ *b; 2880 2881 PetscFunctionBegin; 2882 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2883 PetscCall(PetscLayoutSetUp(B->rmap)); 2884 PetscCall(PetscLayoutSetUp(B->cmap)); 2885 b = (Mat_MPIAIJ*)B->data; 2886 2887 #if defined(PETSC_USE_CTABLE) 2888 PetscCall(PetscTableDestroy(&b->colmap)); 2889 #else 2890 PetscCall(PetscFree(b->colmap)); 2891 #endif 2892 PetscCall(PetscFree(b->garray)); 2893 PetscCall(VecDestroy(&b->lvec)); 2894 PetscCall(VecScatterDestroy(&b->Mvctx)); 2895 2896 PetscCall(MatResetPreallocation(b->A)); 2897 PetscCall(MatResetPreallocation(b->B)); 2898 B->preallocated = PETSC_TRUE; 2899 B->was_assembled = PETSC_FALSE; 2900 B->assembled = PETSC_FALSE; 2901 PetscFunctionReturn(0); 2902 } 2903 2904 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2905 { 2906 Mat mat; 2907 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2908 2909 PetscFunctionBegin; 2910 *newmat = NULL; 2911 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2912 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2913 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2914 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2915 a = (Mat_MPIAIJ*)mat->data; 2916 2917 mat->factortype = matin->factortype; 2918 mat->assembled = matin->assembled; 2919 mat->insertmode = NOT_SET_VALUES; 2920 mat->preallocated = matin->preallocated; 2921 2922 a->size = oldmat->size; 2923 a->rank = oldmat->rank; 2924 a->donotstash = oldmat->donotstash; 2925 a->roworiented = oldmat->roworiented; 2926 a->rowindices = NULL; 2927 a->rowvalues = NULL; 2928 a->getrowactive = PETSC_FALSE; 2929 2930 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2931 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2932 2933 if (oldmat->colmap) { 2934 #if defined(PETSC_USE_CTABLE) 2935 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2936 #else 2937 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2938 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2939 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2940 #endif 2941 } else a->colmap = NULL; 2942 if (oldmat->garray) { 2943 PetscInt len; 2944 len = oldmat->B->cmap->n; 2945 PetscCall(PetscMalloc1(len+1,&a->garray)); 2946 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2947 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2948 } else a->garray = NULL; 2949 2950 /* It may happen MatDuplicate is called with a non-assembled matrix 2951 In fact, MatDuplicate only requires the matrix to be preallocated 2952 This may happen inside a DMCreateMatrix_Shell */ 2953 if (oldmat->lvec) { 2954 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2955 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2956 } 2957 if (oldmat->Mvctx) { 2958 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2959 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2960 } 2961 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2962 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2963 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2964 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2965 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2966 *newmat = mat; 2967 PetscFunctionReturn(0); 2968 } 2969 2970 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2971 { 2972 PetscBool isbinary, ishdf5; 2973 2974 PetscFunctionBegin; 2975 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2976 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2977 /* force binary viewer to load .info file if it has not yet done so */ 2978 PetscCall(PetscViewerSetUp(viewer)); 2979 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2980 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2981 if (isbinary) { 2982 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 2983 } else if (ishdf5) { 2984 #if defined(PETSC_HAVE_HDF5) 2985 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 2986 #else 2987 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2988 #endif 2989 } else { 2990 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2991 } 2992 PetscFunctionReturn(0); 2993 } 2994 2995 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2996 { 2997 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2998 PetscInt *rowidxs,*colidxs; 2999 PetscScalar *matvals; 3000 3001 PetscFunctionBegin; 3002 PetscCall(PetscViewerSetUp(viewer)); 3003 3004 /* read in matrix header */ 3005 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3006 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3007 M = header[1]; N = header[2]; nz = header[3]; 3008 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3009 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3010 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3011 3012 /* set block sizes from the viewer's .info file */ 3013 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3014 /* set global sizes if not set already */ 3015 if (mat->rmap->N < 0) mat->rmap->N = M; 3016 if (mat->cmap->N < 0) mat->cmap->N = N; 3017 PetscCall(PetscLayoutSetUp(mat->rmap)); 3018 PetscCall(PetscLayoutSetUp(mat->cmap)); 3019 3020 /* check if the matrix sizes are correct */ 3021 PetscCall(MatGetSize(mat,&rows,&cols)); 3022 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3023 3024 /* read in row lengths and build row indices */ 3025 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3026 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3027 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3028 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3029 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3030 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3031 /* read in column indices and matrix values */ 3032 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3033 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3034 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3035 /* store matrix indices and values */ 3036 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3037 PetscCall(PetscFree(rowidxs)); 3038 PetscCall(PetscFree2(colidxs,matvals)); 3039 PetscFunctionReturn(0); 3040 } 3041 3042 /* Not scalable because of ISAllGather() unless getting all columns. */ 3043 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3044 { 3045 IS iscol_local; 3046 PetscBool isstride; 3047 PetscMPIInt lisstride=0,gisstride; 3048 3049 PetscFunctionBegin; 3050 /* check if we are grabbing all columns*/ 3051 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3052 3053 if (isstride) { 3054 PetscInt start,len,mstart,mlen; 3055 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3056 PetscCall(ISGetLocalSize(iscol,&len)); 3057 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3058 if (mstart == start && mlen-mstart == len) lisstride = 1; 3059 } 3060 3061 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3062 if (gisstride) { 3063 PetscInt N; 3064 PetscCall(MatGetSize(mat,NULL,&N)); 3065 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3066 PetscCall(ISSetIdentity(iscol_local)); 3067 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3068 } else { 3069 PetscInt cbs; 3070 PetscCall(ISGetBlockSize(iscol,&cbs)); 3071 PetscCall(ISAllGather(iscol,&iscol_local)); 3072 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3073 } 3074 3075 *isseq = iscol_local; 3076 PetscFunctionReturn(0); 3077 } 3078 3079 /* 3080 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3081 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3082 3083 Input Parameters: 3084 mat - matrix 3085 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3086 i.e., mat->rstart <= isrow[i] < mat->rend 3087 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3088 i.e., mat->cstart <= iscol[i] < mat->cend 3089 Output Parameter: 3090 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3091 iscol_o - sequential column index set for retrieving mat->B 3092 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3093 */ 3094 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3095 { 3096 Vec x,cmap; 3097 const PetscInt *is_idx; 3098 PetscScalar *xarray,*cmaparray; 3099 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3100 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3101 Mat B=a->B; 3102 Vec lvec=a->lvec,lcmap; 3103 PetscInt i,cstart,cend,Bn=B->cmap->N; 3104 MPI_Comm comm; 3105 VecScatter Mvctx=a->Mvctx; 3106 3107 PetscFunctionBegin; 3108 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3109 PetscCall(ISGetLocalSize(iscol,&ncols)); 3110 3111 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3112 PetscCall(MatCreateVecs(mat,&x,NULL)); 3113 PetscCall(VecSet(x,-1.0)); 3114 PetscCall(VecDuplicate(x,&cmap)); 3115 PetscCall(VecSet(cmap,-1.0)); 3116 3117 /* Get start indices */ 3118 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3119 isstart -= ncols; 3120 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3121 3122 PetscCall(ISGetIndices(iscol,&is_idx)); 3123 PetscCall(VecGetArray(x,&xarray)); 3124 PetscCall(VecGetArray(cmap,&cmaparray)); 3125 PetscCall(PetscMalloc1(ncols,&idx)); 3126 for (i=0; i<ncols; i++) { 3127 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3128 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3129 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3130 } 3131 PetscCall(VecRestoreArray(x,&xarray)); 3132 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3133 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3134 3135 /* Get iscol_d */ 3136 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3137 PetscCall(ISGetBlockSize(iscol,&i)); 3138 PetscCall(ISSetBlockSize(*iscol_d,i)); 3139 3140 /* Get isrow_d */ 3141 PetscCall(ISGetLocalSize(isrow,&m)); 3142 rstart = mat->rmap->rstart; 3143 PetscCall(PetscMalloc1(m,&idx)); 3144 PetscCall(ISGetIndices(isrow,&is_idx)); 3145 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3146 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3147 3148 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3149 PetscCall(ISGetBlockSize(isrow,&i)); 3150 PetscCall(ISSetBlockSize(*isrow_d,i)); 3151 3152 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3153 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3154 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3155 3156 PetscCall(VecDuplicate(lvec,&lcmap)); 3157 3158 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3159 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3160 3161 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3162 /* off-process column indices */ 3163 count = 0; 3164 PetscCall(PetscMalloc1(Bn,&idx)); 3165 PetscCall(PetscMalloc1(Bn,&cmap1)); 3166 3167 PetscCall(VecGetArray(lvec,&xarray)); 3168 PetscCall(VecGetArray(lcmap,&cmaparray)); 3169 for (i=0; i<Bn; i++) { 3170 if (PetscRealPart(xarray[i]) > -1.0) { 3171 idx[count] = i; /* local column index in off-diagonal part B */ 3172 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3173 count++; 3174 } 3175 } 3176 PetscCall(VecRestoreArray(lvec,&xarray)); 3177 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3178 3179 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3180 /* cannot ensure iscol_o has same blocksize as iscol! */ 3181 3182 PetscCall(PetscFree(idx)); 3183 *garray = cmap1; 3184 3185 PetscCall(VecDestroy(&x)); 3186 PetscCall(VecDestroy(&cmap)); 3187 PetscCall(VecDestroy(&lcmap)); 3188 PetscFunctionReturn(0); 3189 } 3190 3191 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3192 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3193 { 3194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3195 Mat M = NULL; 3196 MPI_Comm comm; 3197 IS iscol_d,isrow_d,iscol_o; 3198 Mat Asub = NULL,Bsub = NULL; 3199 PetscInt n; 3200 3201 PetscFunctionBegin; 3202 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3203 3204 if (call == MAT_REUSE_MATRIX) { 3205 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3206 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3207 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3208 3209 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3210 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3211 3212 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3213 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3214 3215 /* Update diagonal and off-diagonal portions of submat */ 3216 asub = (Mat_MPIAIJ*)(*submat)->data; 3217 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3218 PetscCall(ISGetLocalSize(iscol_o,&n)); 3219 if (n) { 3220 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3221 } 3222 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3223 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3224 3225 } else { /* call == MAT_INITIAL_MATRIX) */ 3226 const PetscInt *garray; 3227 PetscInt BsubN; 3228 3229 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3230 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3231 3232 /* Create local submatrices Asub and Bsub */ 3233 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3234 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3235 3236 /* Create submatrix M */ 3237 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3238 3239 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3240 asub = (Mat_MPIAIJ*)M->data; 3241 3242 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3243 n = asub->B->cmap->N; 3244 if (BsubN > n) { 3245 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3246 const PetscInt *idx; 3247 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3248 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3249 3250 PetscCall(PetscMalloc1(n,&idx_new)); 3251 j = 0; 3252 PetscCall(ISGetIndices(iscol_o,&idx)); 3253 for (i=0; i<n; i++) { 3254 if (j >= BsubN) break; 3255 while (subgarray[i] > garray[j]) j++; 3256 3257 if (subgarray[i] == garray[j]) { 3258 idx_new[i] = idx[j++]; 3259 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3260 } 3261 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3262 3263 PetscCall(ISDestroy(&iscol_o)); 3264 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3265 3266 } else if (BsubN < n) { 3267 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3268 } 3269 3270 PetscCall(PetscFree(garray)); 3271 *submat = M; 3272 3273 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3274 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3275 PetscCall(ISDestroy(&isrow_d)); 3276 3277 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3278 PetscCall(ISDestroy(&iscol_d)); 3279 3280 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3281 PetscCall(ISDestroy(&iscol_o)); 3282 } 3283 PetscFunctionReturn(0); 3284 } 3285 3286 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3287 { 3288 IS iscol_local=NULL,isrow_d; 3289 PetscInt csize; 3290 PetscInt n,i,j,start,end; 3291 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3292 MPI_Comm comm; 3293 3294 PetscFunctionBegin; 3295 /* If isrow has same processor distribution as mat, 3296 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3297 if (call == MAT_REUSE_MATRIX) { 3298 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3299 if (isrow_d) { 3300 sameRowDist = PETSC_TRUE; 3301 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3302 } else { 3303 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3304 if (iscol_local) { 3305 sameRowDist = PETSC_TRUE; 3306 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3307 } 3308 } 3309 } else { 3310 /* Check if isrow has same processor distribution as mat */ 3311 sameDist[0] = PETSC_FALSE; 3312 PetscCall(ISGetLocalSize(isrow,&n)); 3313 if (!n) { 3314 sameDist[0] = PETSC_TRUE; 3315 } else { 3316 PetscCall(ISGetMinMax(isrow,&i,&j)); 3317 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3318 if (i >= start && j < end) { 3319 sameDist[0] = PETSC_TRUE; 3320 } 3321 } 3322 3323 /* Check if iscol has same processor distribution as mat */ 3324 sameDist[1] = PETSC_FALSE; 3325 PetscCall(ISGetLocalSize(iscol,&n)); 3326 if (!n) { 3327 sameDist[1] = PETSC_TRUE; 3328 } else { 3329 PetscCall(ISGetMinMax(iscol,&i,&j)); 3330 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3331 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3332 } 3333 3334 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3335 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3336 sameRowDist = tsameDist[0]; 3337 } 3338 3339 if (sameRowDist) { 3340 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3341 /* isrow and iscol have same processor distribution as mat */ 3342 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3343 PetscFunctionReturn(0); 3344 } else { /* sameRowDist */ 3345 /* isrow has same processor distribution as mat */ 3346 if (call == MAT_INITIAL_MATRIX) { 3347 PetscBool sorted; 3348 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3349 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3350 PetscCall(ISGetSize(iscol,&i)); 3351 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3352 3353 PetscCall(ISSorted(iscol_local,&sorted)); 3354 if (sorted) { 3355 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3356 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3357 PetscFunctionReturn(0); 3358 } 3359 } else { /* call == MAT_REUSE_MATRIX */ 3360 IS iscol_sub; 3361 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3362 if (iscol_sub) { 3363 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3364 PetscFunctionReturn(0); 3365 } 3366 } 3367 } 3368 } 3369 3370 /* General case: iscol -> iscol_local which has global size of iscol */ 3371 if (call == MAT_REUSE_MATRIX) { 3372 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3373 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3374 } else { 3375 if (!iscol_local) { 3376 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3377 } 3378 } 3379 3380 PetscCall(ISGetLocalSize(iscol,&csize)); 3381 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3382 3383 if (call == MAT_INITIAL_MATRIX) { 3384 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3385 PetscCall(ISDestroy(&iscol_local)); 3386 } 3387 PetscFunctionReturn(0); 3388 } 3389 3390 /*@C 3391 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3392 and "off-diagonal" part of the matrix in CSR format. 3393 3394 Collective 3395 3396 Input Parameters: 3397 + comm - MPI communicator 3398 . A - "diagonal" portion of matrix 3399 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3400 - garray - global index of B columns 3401 3402 Output Parameter: 3403 . mat - the matrix, with input A as its local diagonal matrix 3404 Level: advanced 3405 3406 Notes: 3407 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3408 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3409 3410 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3411 @*/ 3412 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3413 { 3414 Mat_MPIAIJ *maij; 3415 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3416 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3417 const PetscScalar *oa; 3418 Mat Bnew; 3419 PetscInt m,n,N; 3420 3421 PetscFunctionBegin; 3422 PetscCall(MatCreate(comm,mat)); 3423 PetscCall(MatGetSize(A,&m,&n)); 3424 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3425 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3426 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3427 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3428 3429 /* Get global columns of mat */ 3430 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3431 3432 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3433 PetscCall(MatSetType(*mat,MATMPIAIJ)); 3434 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3435 maij = (Mat_MPIAIJ*)(*mat)->data; 3436 3437 (*mat)->preallocated = PETSC_TRUE; 3438 3439 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3440 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3441 3442 /* Set A as diagonal portion of *mat */ 3443 maij->A = A; 3444 3445 nz = oi[m]; 3446 for (i=0; i<nz; i++) { 3447 col = oj[i]; 3448 oj[i] = garray[col]; 3449 } 3450 3451 /* Set Bnew as off-diagonal portion of *mat */ 3452 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3453 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3454 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3455 bnew = (Mat_SeqAIJ*)Bnew->data; 3456 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3457 maij->B = Bnew; 3458 3459 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3460 3461 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3462 b->free_a = PETSC_FALSE; 3463 b->free_ij = PETSC_FALSE; 3464 PetscCall(MatDestroy(&B)); 3465 3466 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3467 bnew->free_a = PETSC_TRUE; 3468 bnew->free_ij = PETSC_TRUE; 3469 3470 /* condense columns of maij->B */ 3471 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3472 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3473 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3474 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3475 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3476 PetscFunctionReturn(0); 3477 } 3478 3479 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3480 3481 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3482 { 3483 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3484 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3485 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3486 Mat M,Msub,B=a->B; 3487 MatScalar *aa; 3488 Mat_SeqAIJ *aij; 3489 PetscInt *garray = a->garray,*colsub,Ncols; 3490 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3491 IS iscol_sub,iscmap; 3492 const PetscInt *is_idx,*cmap; 3493 PetscBool allcolumns=PETSC_FALSE; 3494 MPI_Comm comm; 3495 3496 PetscFunctionBegin; 3497 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3498 if (call == MAT_REUSE_MATRIX) { 3499 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3500 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3501 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3502 3503 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3504 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3505 3506 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3507 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3508 3509 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3510 3511 } else { /* call == MAT_INITIAL_MATRIX) */ 3512 PetscBool flg; 3513 3514 PetscCall(ISGetLocalSize(iscol,&n)); 3515 PetscCall(ISGetSize(iscol,&Ncols)); 3516 3517 /* (1) iscol -> nonscalable iscol_local */ 3518 /* Check for special case: each processor gets entire matrix columns */ 3519 PetscCall(ISIdentity(iscol_local,&flg)); 3520 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3521 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3522 if (allcolumns) { 3523 iscol_sub = iscol_local; 3524 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3525 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3526 3527 } else { 3528 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3529 PetscInt *idx,*cmap1,k; 3530 PetscCall(PetscMalloc1(Ncols,&idx)); 3531 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3532 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3533 count = 0; 3534 k = 0; 3535 for (i=0; i<Ncols; i++) { 3536 j = is_idx[i]; 3537 if (j >= cstart && j < cend) { 3538 /* diagonal part of mat */ 3539 idx[count] = j; 3540 cmap1[count++] = i; /* column index in submat */ 3541 } else if (Bn) { 3542 /* off-diagonal part of mat */ 3543 if (j == garray[k]) { 3544 idx[count] = j; 3545 cmap1[count++] = i; /* column index in submat */ 3546 } else if (j > garray[k]) { 3547 while (j > garray[k] && k < Bn-1) k++; 3548 if (j == garray[k]) { 3549 idx[count] = j; 3550 cmap1[count++] = i; /* column index in submat */ 3551 } 3552 } 3553 } 3554 } 3555 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3556 3557 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3558 PetscCall(ISGetBlockSize(iscol,&cbs)); 3559 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3560 3561 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3562 } 3563 3564 /* (3) Create sequential Msub */ 3565 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3566 } 3567 3568 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3569 aij = (Mat_SeqAIJ*)(Msub)->data; 3570 ii = aij->i; 3571 PetscCall(ISGetIndices(iscmap,&cmap)); 3572 3573 /* 3574 m - number of local rows 3575 Ncols - number of columns (same on all processors) 3576 rstart - first row in new global matrix generated 3577 */ 3578 PetscCall(MatGetSize(Msub,&m,NULL)); 3579 3580 if (call == MAT_INITIAL_MATRIX) { 3581 /* (4) Create parallel newmat */ 3582 PetscMPIInt rank,size; 3583 PetscInt csize; 3584 3585 PetscCallMPI(MPI_Comm_size(comm,&size)); 3586 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3587 3588 /* 3589 Determine the number of non-zeros in the diagonal and off-diagonal 3590 portions of the matrix in order to do correct preallocation 3591 */ 3592 3593 /* first get start and end of "diagonal" columns */ 3594 PetscCall(ISGetLocalSize(iscol,&csize)); 3595 if (csize == PETSC_DECIDE) { 3596 PetscCall(ISGetSize(isrow,&mglobal)); 3597 if (mglobal == Ncols) { /* square matrix */ 3598 nlocal = m; 3599 } else { 3600 nlocal = Ncols/size + ((Ncols % size) > rank); 3601 } 3602 } else { 3603 nlocal = csize; 3604 } 3605 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3606 rstart = rend - nlocal; 3607 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3608 3609 /* next, compute all the lengths */ 3610 jj = aij->j; 3611 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3612 olens = dlens + m; 3613 for (i=0; i<m; i++) { 3614 jend = ii[i+1] - ii[i]; 3615 olen = 0; 3616 dlen = 0; 3617 for (j=0; j<jend; j++) { 3618 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3619 else dlen++; 3620 jj++; 3621 } 3622 olens[i] = olen; 3623 dlens[i] = dlen; 3624 } 3625 3626 PetscCall(ISGetBlockSize(isrow,&bs)); 3627 PetscCall(ISGetBlockSize(iscol,&cbs)); 3628 3629 PetscCall(MatCreate(comm,&M)); 3630 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3631 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3632 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3633 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3634 PetscCall(PetscFree(dlens)); 3635 3636 } else { /* call == MAT_REUSE_MATRIX */ 3637 M = *newmat; 3638 PetscCall(MatGetLocalSize(M,&i,NULL)); 3639 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3640 PetscCall(MatZeroEntries(M)); 3641 /* 3642 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3643 rather than the slower MatSetValues(). 3644 */ 3645 M->was_assembled = PETSC_TRUE; 3646 M->assembled = PETSC_FALSE; 3647 } 3648 3649 /* (5) Set values of Msub to *newmat */ 3650 PetscCall(PetscMalloc1(count,&colsub)); 3651 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3652 3653 jj = aij->j; 3654 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3655 for (i=0; i<m; i++) { 3656 row = rstart + i; 3657 nz = ii[i+1] - ii[i]; 3658 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3659 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3660 jj += nz; aa += nz; 3661 } 3662 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3663 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3664 3665 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3666 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3667 3668 PetscCall(PetscFree(colsub)); 3669 3670 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3671 if (call == MAT_INITIAL_MATRIX) { 3672 *newmat = M; 3673 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3674 PetscCall(MatDestroy(&Msub)); 3675 3676 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3677 PetscCall(ISDestroy(&iscol_sub)); 3678 3679 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3680 PetscCall(ISDestroy(&iscmap)); 3681 3682 if (iscol_local) { 3683 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3684 PetscCall(ISDestroy(&iscol_local)); 3685 } 3686 } 3687 PetscFunctionReturn(0); 3688 } 3689 3690 /* 3691 Not great since it makes two copies of the submatrix, first an SeqAIJ 3692 in local and then by concatenating the local matrices the end result. 3693 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3694 3695 Note: This requires a sequential iscol with all indices. 3696 */ 3697 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3698 { 3699 PetscMPIInt rank,size; 3700 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3701 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3702 Mat M,Mreuse; 3703 MatScalar *aa,*vwork; 3704 MPI_Comm comm; 3705 Mat_SeqAIJ *aij; 3706 PetscBool colflag,allcolumns=PETSC_FALSE; 3707 3708 PetscFunctionBegin; 3709 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3710 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3711 PetscCallMPI(MPI_Comm_size(comm,&size)); 3712 3713 /* Check for special case: each processor gets entire matrix columns */ 3714 PetscCall(ISIdentity(iscol,&colflag)); 3715 PetscCall(ISGetLocalSize(iscol,&n)); 3716 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3717 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3718 3719 if (call == MAT_REUSE_MATRIX) { 3720 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3721 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3722 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3723 } else { 3724 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3725 } 3726 3727 /* 3728 m - number of local rows 3729 n - number of columns (same on all processors) 3730 rstart - first row in new global matrix generated 3731 */ 3732 PetscCall(MatGetSize(Mreuse,&m,&n)); 3733 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3734 if (call == MAT_INITIAL_MATRIX) { 3735 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3736 ii = aij->i; 3737 jj = aij->j; 3738 3739 /* 3740 Determine the number of non-zeros in the diagonal and off-diagonal 3741 portions of the matrix in order to do correct preallocation 3742 */ 3743 3744 /* first get start and end of "diagonal" columns */ 3745 if (csize == PETSC_DECIDE) { 3746 PetscCall(ISGetSize(isrow,&mglobal)); 3747 if (mglobal == n) { /* square matrix */ 3748 nlocal = m; 3749 } else { 3750 nlocal = n/size + ((n % size) > rank); 3751 } 3752 } else { 3753 nlocal = csize; 3754 } 3755 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3756 rstart = rend - nlocal; 3757 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3758 3759 /* next, compute all the lengths */ 3760 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3761 olens = dlens + m; 3762 for (i=0; i<m; i++) { 3763 jend = ii[i+1] - ii[i]; 3764 olen = 0; 3765 dlen = 0; 3766 for (j=0; j<jend; j++) { 3767 if (*jj < rstart || *jj >= rend) olen++; 3768 else dlen++; 3769 jj++; 3770 } 3771 olens[i] = olen; 3772 dlens[i] = dlen; 3773 } 3774 PetscCall(MatCreate(comm,&M)); 3775 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3776 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3777 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3778 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3779 PetscCall(PetscFree(dlens)); 3780 } else { 3781 PetscInt ml,nl; 3782 3783 M = *newmat; 3784 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3785 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3786 PetscCall(MatZeroEntries(M)); 3787 /* 3788 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3789 rather than the slower MatSetValues(). 3790 */ 3791 M->was_assembled = PETSC_TRUE; 3792 M->assembled = PETSC_FALSE; 3793 } 3794 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3795 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3796 ii = aij->i; 3797 jj = aij->j; 3798 3799 /* trigger copy to CPU if needed */ 3800 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3801 for (i=0; i<m; i++) { 3802 row = rstart + i; 3803 nz = ii[i+1] - ii[i]; 3804 cwork = jj; jj += nz; 3805 vwork = aa; aa += nz; 3806 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3807 } 3808 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3809 3810 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3811 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3812 *newmat = M; 3813 3814 /* save submatrix used in processor for next request */ 3815 if (call == MAT_INITIAL_MATRIX) { 3816 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3817 PetscCall(MatDestroy(&Mreuse)); 3818 } 3819 PetscFunctionReturn(0); 3820 } 3821 3822 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3823 { 3824 PetscInt m,cstart, cend,j,nnz,i,d,*ld; 3825 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3826 const PetscInt *JJ; 3827 PetscBool nooffprocentries; 3828 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)B->data; 3829 3830 PetscFunctionBegin; 3831 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3832 3833 PetscCall(PetscLayoutSetUp(B->rmap)); 3834 PetscCall(PetscLayoutSetUp(B->cmap)); 3835 m = B->rmap->n; 3836 cstart = B->cmap->rstart; 3837 cend = B->cmap->rend; 3838 rstart = B->rmap->rstart; 3839 3840 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3841 3842 if (PetscDefined(USE_DEBUG)) { 3843 for (i=0; i<m; i++) { 3844 nnz = Ii[i+1]- Ii[i]; 3845 JJ = J + Ii[i]; 3846 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3847 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3848 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3849 } 3850 } 3851 3852 for (i=0; i<m; i++) { 3853 nnz = Ii[i+1]- Ii[i]; 3854 JJ = J + Ii[i]; 3855 nnz_max = PetscMax(nnz_max,nnz); 3856 d = 0; 3857 for (j=0; j<nnz; j++) { 3858 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3859 } 3860 d_nnz[i] = d; 3861 o_nnz[i] = nnz - d; 3862 } 3863 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3864 PetscCall(PetscFree2(d_nnz,o_nnz)); 3865 3866 for (i=0; i<m; i++) { 3867 ii = i + rstart; 3868 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3869 } 3870 nooffprocentries = B->nooffprocentries; 3871 B->nooffprocentries = PETSC_TRUE; 3872 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3873 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3874 B->nooffprocentries = nooffprocentries; 3875 3876 /* count number of entries below block diagonal */ 3877 PetscCall(PetscFree(Aij->ld)); 3878 PetscCall(PetscCalloc1(m,&ld)); 3879 Aij->ld = ld; 3880 for (i=0; i<m; i++) { 3881 nnz = Ii[i+1] - Ii[i]; 3882 j = 0; 3883 while (j < nnz && J[j] < cstart) {j++;} 3884 ld[i] = j; 3885 J += nnz; 3886 } 3887 3888 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3889 PetscFunctionReturn(0); 3890 } 3891 3892 /*@ 3893 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3894 (the default parallel PETSc format). 3895 3896 Collective 3897 3898 Input Parameters: 3899 + B - the matrix 3900 . i - the indices into j for the start of each local row (starts with zero) 3901 . j - the column indices for each local row (starts with zero) 3902 - v - optional values in the matrix 3903 3904 Level: developer 3905 3906 Notes: 3907 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3908 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3909 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3910 3911 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3912 3913 The format which is used for the sparse matrix input, is equivalent to a 3914 row-major ordering.. i.e for the following matrix, the input data expected is 3915 as shown 3916 3917 $ 1 0 0 3918 $ 2 0 3 P0 3919 $ ------- 3920 $ 4 5 6 P1 3921 $ 3922 $ Process0 [P0]: rows_owned=[0,1] 3923 $ i = {0,1,3} [size = nrow+1 = 2+1] 3924 $ j = {0,0,2} [size = 3] 3925 $ v = {1,2,3} [size = 3] 3926 $ 3927 $ Process1 [P1]: rows_owned=[2] 3928 $ i = {0,3} [size = nrow+1 = 1+1] 3929 $ j = {0,1,2} [size = 3] 3930 $ v = {4,5,6} [size = 3] 3931 3932 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3933 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3934 @*/ 3935 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3936 { 3937 PetscFunctionBegin; 3938 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3939 PetscFunctionReturn(0); 3940 } 3941 3942 /*@C 3943 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3944 (the default parallel PETSc format). For good matrix assembly performance 3945 the user should preallocate the matrix storage by setting the parameters 3946 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3947 performance can be increased by more than a factor of 50. 3948 3949 Collective 3950 3951 Input Parameters: 3952 + B - the matrix 3953 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3954 (same value is used for all local rows) 3955 . d_nnz - array containing the number of nonzeros in the various rows of the 3956 DIAGONAL portion of the local submatrix (possibly different for each row) 3957 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3958 The size of this array is equal to the number of local rows, i.e 'm'. 3959 For matrices that will be factored, you must leave room for (and set) 3960 the diagonal entry even if it is zero. 3961 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3962 submatrix (same value is used for all local rows). 3963 - o_nnz - array containing the number of nonzeros in the various rows of the 3964 OFF-DIAGONAL portion of the local submatrix (possibly different for 3965 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3966 structure. The size of this array is equal to the number 3967 of local rows, i.e 'm'. 3968 3969 If the *_nnz parameter is given then the *_nz parameter is ignored 3970 3971 The AIJ format (also called the Yale sparse matrix format or 3972 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3973 storage. The stored row and column indices begin with zero. 3974 See Users-Manual: ch_mat for details. 3975 3976 The parallel matrix is partitioned such that the first m0 rows belong to 3977 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3978 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3979 3980 The DIAGONAL portion of the local submatrix of a processor can be defined 3981 as the submatrix which is obtained by extraction the part corresponding to 3982 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3983 first row that belongs to the processor, r2 is the last row belonging to 3984 the this processor, and c1-c2 is range of indices of the local part of a 3985 vector suitable for applying the matrix to. This is an mxn matrix. In the 3986 common case of a square matrix, the row and column ranges are the same and 3987 the DIAGONAL part is also square. The remaining portion of the local 3988 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3989 3990 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3991 3992 You can call MatGetInfo() to get information on how effective the preallocation was; 3993 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3994 You can also run with the option -info and look for messages with the string 3995 malloc in them to see if additional memory allocation was needed. 3996 3997 Example usage: 3998 3999 Consider the following 8x8 matrix with 34 non-zero values, that is 4000 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4001 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4002 as follows: 4003 4004 .vb 4005 1 2 0 | 0 3 0 | 0 4 4006 Proc0 0 5 6 | 7 0 0 | 8 0 4007 9 0 10 | 11 0 0 | 12 0 4008 ------------------------------------- 4009 13 0 14 | 15 16 17 | 0 0 4010 Proc1 0 18 0 | 19 20 21 | 0 0 4011 0 0 0 | 22 23 0 | 24 0 4012 ------------------------------------- 4013 Proc2 25 26 27 | 0 0 28 | 29 0 4014 30 0 0 | 31 32 33 | 0 34 4015 .ve 4016 4017 This can be represented as a collection of submatrices as: 4018 4019 .vb 4020 A B C 4021 D E F 4022 G H I 4023 .ve 4024 4025 Where the submatrices A,B,C are owned by proc0, D,E,F are 4026 owned by proc1, G,H,I are owned by proc2. 4027 4028 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4029 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4030 The 'M','N' parameters are 8,8, and have the same values on all procs. 4031 4032 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4033 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4034 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4035 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4036 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4037 matrix, ans [DF] as another SeqAIJ matrix. 4038 4039 When d_nz, o_nz parameters are specified, d_nz storage elements are 4040 allocated for every row of the local diagonal submatrix, and o_nz 4041 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4042 One way to choose d_nz and o_nz is to use the max nonzerors per local 4043 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4044 In this case, the values of d_nz,o_nz are: 4045 .vb 4046 proc0 : dnz = 2, o_nz = 2 4047 proc1 : dnz = 3, o_nz = 2 4048 proc2 : dnz = 1, o_nz = 4 4049 .ve 4050 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4051 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4052 for proc3. i.e we are using 12+15+10=37 storage locations to store 4053 34 values. 4054 4055 When d_nnz, o_nnz parameters are specified, the storage is specified 4056 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4057 In the above case the values for d_nnz,o_nnz are: 4058 .vb 4059 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4060 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4061 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4062 .ve 4063 Here the space allocated is sum of all the above values i.e 34, and 4064 hence pre-allocation is perfect. 4065 4066 Level: intermediate 4067 4068 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4069 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4070 @*/ 4071 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4072 { 4073 PetscFunctionBegin; 4074 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4075 PetscValidType(B,1); 4076 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4077 PetscFunctionReturn(0); 4078 } 4079 4080 /*@ 4081 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4082 CSR format for the local rows. 4083 4084 Collective 4085 4086 Input Parameters: 4087 + comm - MPI communicator 4088 . m - number of local rows (Cannot be PETSC_DECIDE) 4089 . n - This value should be the same as the local size used in creating the 4090 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4091 calculated if N is given) For square matrices n is almost always m. 4092 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4093 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4094 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4095 . j - column indices 4096 - a - matrix values 4097 4098 Output Parameter: 4099 . mat - the matrix 4100 4101 Level: intermediate 4102 4103 Notes: 4104 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4105 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4106 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4107 4108 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4109 4110 The format which is used for the sparse matrix input, is equivalent to a 4111 row-major ordering.. i.e for the following matrix, the input data expected is 4112 as shown 4113 4114 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4115 4116 $ 1 0 0 4117 $ 2 0 3 P0 4118 $ ------- 4119 $ 4 5 6 P1 4120 $ 4121 $ Process0 [P0]: rows_owned=[0,1] 4122 $ i = {0,1,3} [size = nrow+1 = 2+1] 4123 $ j = {0,0,2} [size = 3] 4124 $ v = {1,2,3} [size = 3] 4125 $ 4126 $ Process1 [P1]: rows_owned=[2] 4127 $ i = {0,3} [size = nrow+1 = 1+1] 4128 $ j = {0,1,2} [size = 3] 4129 $ v = {4,5,6} [size = 3] 4130 4131 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4132 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4133 @*/ 4134 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4135 { 4136 PetscFunctionBegin; 4137 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4138 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4139 PetscCall(MatCreate(comm,mat)); 4140 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4141 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4142 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4143 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4144 PetscFunctionReturn(0); 4145 } 4146 4147 /*@ 4148 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4149 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays() 4150 4151 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4152 4153 Collective 4154 4155 Input Parameters: 4156 + mat - the matrix 4157 . m - number of local rows (Cannot be PETSC_DECIDE) 4158 . n - This value should be the same as the local size used in creating the 4159 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4160 calculated if N is given) For square matrices n is almost always m. 4161 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4162 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4163 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4164 . J - column indices 4165 - v - matrix values 4166 4167 Level: intermediate 4168 4169 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4170 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4171 @*/ 4172 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4173 { 4174 PetscInt nnz,i; 4175 PetscBool nooffprocentries; 4176 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4177 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4178 PetscScalar *ad,*ao; 4179 PetscInt ldi,Iii,md; 4180 const PetscInt *Adi = Ad->i; 4181 PetscInt *ld = Aij->ld; 4182 4183 PetscFunctionBegin; 4184 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4185 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4186 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4187 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4188 4189 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4190 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4191 4192 for (i=0; i<m; i++) { 4193 nnz = Ii[i+1]- Ii[i]; 4194 Iii = Ii[i]; 4195 ldi = ld[i]; 4196 md = Adi[i+1]-Adi[i]; 4197 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4198 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4199 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4200 ad += md; 4201 ao += nnz - md; 4202 } 4203 nooffprocentries = mat->nooffprocentries; 4204 mat->nooffprocentries = PETSC_TRUE; 4205 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4206 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4207 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4208 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4209 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4210 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4211 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4212 mat->nooffprocentries = nooffprocentries; 4213 PetscFunctionReturn(0); 4214 } 4215 4216 /*@ 4217 MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values 4218 4219 Collective 4220 4221 Input Parameters: 4222 + mat - the matrix 4223 - v - matrix values, stored by row 4224 4225 Level: intermediate 4226 4227 Notes: 4228 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4229 4230 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4231 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4232 @*/ 4233 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[]) 4234 { 4235 PetscInt nnz,i,m; 4236 PetscBool nooffprocentries; 4237 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4238 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4239 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)Aij->B->data; 4240 PetscScalar *ad,*ao; 4241 const PetscInt *Adi = Ad->i,*Adj = Ao->i; 4242 PetscInt ldi,Iii,md; 4243 PetscInt *ld = Aij->ld; 4244 4245 PetscFunctionBegin; 4246 m = mat->rmap->n; 4247 4248 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4249 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4250 Iii = 0; 4251 for (i=0; i<m; i++) { 4252 nnz = Adi[i+1]-Adi[i] + Adj[i+1]-Adj[i]; 4253 ldi = ld[i]; 4254 md = Adi[i+1]-Adi[i]; 4255 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4256 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4257 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4258 ad += md; 4259 ao += nnz - md; 4260 Iii += nnz; 4261 } 4262 nooffprocentries = mat->nooffprocentries; 4263 mat->nooffprocentries = PETSC_TRUE; 4264 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4265 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4266 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4267 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4268 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4269 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4270 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4271 mat->nooffprocentries = nooffprocentries; 4272 PetscFunctionReturn(0); 4273 } 4274 4275 /*@C 4276 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4277 (the default parallel PETSc format). For good matrix assembly performance 4278 the user should preallocate the matrix storage by setting the parameters 4279 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4280 performance can be increased by more than a factor of 50. 4281 4282 Collective 4283 4284 Input Parameters: 4285 + comm - MPI communicator 4286 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4287 This value should be the same as the local size used in creating the 4288 y vector for the matrix-vector product y = Ax. 4289 . n - This value should be the same as the local size used in creating the 4290 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4291 calculated if N is given) For square matrices n is almost always m. 4292 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4293 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4294 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4295 (same value is used for all local rows) 4296 . d_nnz - array containing the number of nonzeros in the various rows of the 4297 DIAGONAL portion of the local submatrix (possibly different for each row) 4298 or NULL, if d_nz is used to specify the nonzero structure. 4299 The size of this array is equal to the number of local rows, i.e 'm'. 4300 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4301 submatrix (same value is used for all local rows). 4302 - o_nnz - array containing the number of nonzeros in the various rows of the 4303 OFF-DIAGONAL portion of the local submatrix (possibly different for 4304 each row) or NULL, if o_nz is used to specify the nonzero 4305 structure. The size of this array is equal to the number 4306 of local rows, i.e 'm'. 4307 4308 Output Parameter: 4309 . A - the matrix 4310 4311 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4312 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4313 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4314 4315 Notes: 4316 If the *_nnz parameter is given then the *_nz parameter is ignored 4317 4318 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4319 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4320 storage requirements for this matrix. 4321 4322 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4323 processor than it must be used on all processors that share the object for 4324 that argument. 4325 4326 The user MUST specify either the local or global matrix dimensions 4327 (possibly both). 4328 4329 The parallel matrix is partitioned across processors such that the 4330 first m0 rows belong to process 0, the next m1 rows belong to 4331 process 1, the next m2 rows belong to process 2 etc.. where 4332 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4333 values corresponding to [m x N] submatrix. 4334 4335 The columns are logically partitioned with the n0 columns belonging 4336 to 0th partition, the next n1 columns belonging to the next 4337 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4338 4339 The DIAGONAL portion of the local submatrix on any given processor 4340 is the submatrix corresponding to the rows and columns m,n 4341 corresponding to the given processor. i.e diagonal matrix on 4342 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4343 etc. The remaining portion of the local submatrix [m x (N-n)] 4344 constitute the OFF-DIAGONAL portion. The example below better 4345 illustrates this concept. 4346 4347 For a square global matrix we define each processor's diagonal portion 4348 to be its local rows and the corresponding columns (a square submatrix); 4349 each processor's off-diagonal portion encompasses the remainder of the 4350 local matrix (a rectangular submatrix). 4351 4352 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4353 4354 When calling this routine with a single process communicator, a matrix of 4355 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4356 type of communicator, use the construction mechanism 4357 .vb 4358 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4359 .ve 4360 4361 $ MatCreate(...,&A); 4362 $ MatSetType(A,MATMPIAIJ); 4363 $ MatSetSizes(A, m,n,M,N); 4364 $ MatMPIAIJSetPreallocation(A,...); 4365 4366 By default, this format uses inodes (identical nodes) when possible. 4367 We search for consecutive rows with the same nonzero structure, thereby 4368 reusing matrix information to achieve increased efficiency. 4369 4370 Options Database Keys: 4371 + -mat_no_inode - Do not use inodes 4372 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4373 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4374 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4375 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4376 4377 Example usage: 4378 4379 Consider the following 8x8 matrix with 34 non-zero values, that is 4380 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4381 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4382 as follows 4383 4384 .vb 4385 1 2 0 | 0 3 0 | 0 4 4386 Proc0 0 5 6 | 7 0 0 | 8 0 4387 9 0 10 | 11 0 0 | 12 0 4388 ------------------------------------- 4389 13 0 14 | 15 16 17 | 0 0 4390 Proc1 0 18 0 | 19 20 21 | 0 0 4391 0 0 0 | 22 23 0 | 24 0 4392 ------------------------------------- 4393 Proc2 25 26 27 | 0 0 28 | 29 0 4394 30 0 0 | 31 32 33 | 0 34 4395 .ve 4396 4397 This can be represented as a collection of submatrices as 4398 4399 .vb 4400 A B C 4401 D E F 4402 G H I 4403 .ve 4404 4405 Where the submatrices A,B,C are owned by proc0, D,E,F are 4406 owned by proc1, G,H,I are owned by proc2. 4407 4408 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4409 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4410 The 'M','N' parameters are 8,8, and have the same values on all procs. 4411 4412 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4413 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4414 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4415 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4416 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4417 matrix, ans [DF] as another SeqAIJ matrix. 4418 4419 When d_nz, o_nz parameters are specified, d_nz storage elements are 4420 allocated for every row of the local diagonal submatrix, and o_nz 4421 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4422 One way to choose d_nz and o_nz is to use the max nonzerors per local 4423 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4424 In this case, the values of d_nz,o_nz are 4425 .vb 4426 proc0 : dnz = 2, o_nz = 2 4427 proc1 : dnz = 3, o_nz = 2 4428 proc2 : dnz = 1, o_nz = 4 4429 .ve 4430 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4431 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4432 for proc3. i.e we are using 12+15+10=37 storage locations to store 4433 34 values. 4434 4435 When d_nnz, o_nnz parameters are specified, the storage is specified 4436 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4437 In the above case the values for d_nnz,o_nnz are 4438 .vb 4439 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4440 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4441 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4442 .ve 4443 Here the space allocated is sum of all the above values i.e 34, and 4444 hence pre-allocation is perfect. 4445 4446 Level: intermediate 4447 4448 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4449 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4450 @*/ 4451 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4452 { 4453 PetscMPIInt size; 4454 4455 PetscFunctionBegin; 4456 PetscCall(MatCreate(comm,A)); 4457 PetscCall(MatSetSizes(*A,m,n,M,N)); 4458 PetscCallMPI(MPI_Comm_size(comm,&size)); 4459 if (size > 1) { 4460 PetscCall(MatSetType(*A,MATMPIAIJ)); 4461 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4462 } else { 4463 PetscCall(MatSetType(*A,MATSEQAIJ)); 4464 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4465 } 4466 PetscFunctionReturn(0); 4467 } 4468 4469 /*@C 4470 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4471 4472 Not collective 4473 4474 Input Parameter: 4475 . A - The MPIAIJ matrix 4476 4477 Output Parameters: 4478 + Ad - The local diagonal block as a SeqAIJ matrix 4479 . Ao - The local off-diagonal block as a SeqAIJ matrix 4480 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4481 4482 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4483 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4484 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4485 local column numbers to global column numbers in the original matrix. 4486 4487 Level: intermediate 4488 4489 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4490 @*/ 4491 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4492 { 4493 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4494 PetscBool flg; 4495 4496 PetscFunctionBegin; 4497 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4498 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4499 if (Ad) *Ad = a->A; 4500 if (Ao) *Ao = a->B; 4501 if (colmap) *colmap = a->garray; 4502 PetscFunctionReturn(0); 4503 } 4504 4505 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4506 { 4507 PetscInt m,N,i,rstart,nnz,Ii; 4508 PetscInt *indx; 4509 PetscScalar *values; 4510 MatType rootType; 4511 4512 PetscFunctionBegin; 4513 PetscCall(MatGetSize(inmat,&m,&N)); 4514 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4515 PetscInt *dnz,*onz,sum,bs,cbs; 4516 4517 if (n == PETSC_DECIDE) { 4518 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4519 } 4520 /* Check sum(n) = N */ 4521 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4522 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4523 4524 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4525 rstart -= m; 4526 4527 MatPreallocateBegin(comm,m,n,dnz,onz); 4528 for (i=0; i<m; i++) { 4529 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4530 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4531 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4532 } 4533 4534 PetscCall(MatCreate(comm,outmat)); 4535 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4536 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4537 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4538 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4539 PetscCall(MatSetType(*outmat,rootType)); 4540 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4541 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4542 MatPreallocateEnd(dnz,onz); 4543 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4544 } 4545 4546 /* numeric phase */ 4547 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4548 for (i=0; i<m; i++) { 4549 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4550 Ii = i + rstart; 4551 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4552 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4553 } 4554 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4555 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4556 PetscFunctionReturn(0); 4557 } 4558 4559 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4560 { 4561 PetscMPIInt rank; 4562 PetscInt m,N,i,rstart,nnz; 4563 size_t len; 4564 const PetscInt *indx; 4565 PetscViewer out; 4566 char *name; 4567 Mat B; 4568 const PetscScalar *values; 4569 4570 PetscFunctionBegin; 4571 PetscCall(MatGetLocalSize(A,&m,NULL)); 4572 PetscCall(MatGetSize(A,NULL,&N)); 4573 /* Should this be the type of the diagonal block of A? */ 4574 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4575 PetscCall(MatSetSizes(B,m,N,m,N)); 4576 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4577 PetscCall(MatSetType(B,MATSEQAIJ)); 4578 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4579 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4580 for (i=0; i<m; i++) { 4581 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4582 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4583 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4584 } 4585 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4586 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4587 4588 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4589 PetscCall(PetscStrlen(outfile,&len)); 4590 PetscCall(PetscMalloc1(len+6,&name)); 4591 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4592 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4593 PetscCall(PetscFree(name)); 4594 PetscCall(MatView(B,out)); 4595 PetscCall(PetscViewerDestroy(&out)); 4596 PetscCall(MatDestroy(&B)); 4597 PetscFunctionReturn(0); 4598 } 4599 4600 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4601 { 4602 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4603 4604 PetscFunctionBegin; 4605 if (!merge) PetscFunctionReturn(0); 4606 PetscCall(PetscFree(merge->id_r)); 4607 PetscCall(PetscFree(merge->len_s)); 4608 PetscCall(PetscFree(merge->len_r)); 4609 PetscCall(PetscFree(merge->bi)); 4610 PetscCall(PetscFree(merge->bj)); 4611 PetscCall(PetscFree(merge->buf_ri[0])); 4612 PetscCall(PetscFree(merge->buf_ri)); 4613 PetscCall(PetscFree(merge->buf_rj[0])); 4614 PetscCall(PetscFree(merge->buf_rj)); 4615 PetscCall(PetscFree(merge->coi)); 4616 PetscCall(PetscFree(merge->coj)); 4617 PetscCall(PetscFree(merge->owners_co)); 4618 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4619 PetscCall(PetscFree(merge)); 4620 PetscFunctionReturn(0); 4621 } 4622 4623 #include <../src/mat/utils/freespace.h> 4624 #include <petscbt.h> 4625 4626 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4627 { 4628 MPI_Comm comm; 4629 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4630 PetscMPIInt size,rank,taga,*len_s; 4631 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4632 PetscInt proc,m; 4633 PetscInt **buf_ri,**buf_rj; 4634 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4635 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4636 MPI_Request *s_waits,*r_waits; 4637 MPI_Status *status; 4638 const MatScalar *aa,*a_a; 4639 MatScalar **abuf_r,*ba_i; 4640 Mat_Merge_SeqsToMPI *merge; 4641 PetscContainer container; 4642 4643 PetscFunctionBegin; 4644 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4645 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4646 4647 PetscCallMPI(MPI_Comm_size(comm,&size)); 4648 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4649 4650 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4651 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4652 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4653 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4654 aa = a_a; 4655 4656 bi = merge->bi; 4657 bj = merge->bj; 4658 buf_ri = merge->buf_ri; 4659 buf_rj = merge->buf_rj; 4660 4661 PetscCall(PetscMalloc1(size,&status)); 4662 owners = merge->rowmap->range; 4663 len_s = merge->len_s; 4664 4665 /* send and recv matrix values */ 4666 /*-----------------------------*/ 4667 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4668 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4669 4670 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4671 for (proc=0,k=0; proc<size; proc++) { 4672 if (!len_s[proc]) continue; 4673 i = owners[proc]; 4674 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4675 k++; 4676 } 4677 4678 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4679 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4680 PetscCall(PetscFree(status)); 4681 4682 PetscCall(PetscFree(s_waits)); 4683 PetscCall(PetscFree(r_waits)); 4684 4685 /* insert mat values of mpimat */ 4686 /*----------------------------*/ 4687 PetscCall(PetscMalloc1(N,&ba_i)); 4688 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4689 4690 for (k=0; k<merge->nrecv; k++) { 4691 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4692 nrows = *(buf_ri_k[k]); 4693 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4694 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4695 } 4696 4697 /* set values of ba */ 4698 m = merge->rowmap->n; 4699 for (i=0; i<m; i++) { 4700 arow = owners[rank] + i; 4701 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4702 bnzi = bi[i+1] - bi[i]; 4703 PetscCall(PetscArrayzero(ba_i,bnzi)); 4704 4705 /* add local non-zero vals of this proc's seqmat into ba */ 4706 anzi = ai[arow+1] - ai[arow]; 4707 aj = a->j + ai[arow]; 4708 aa = a_a + ai[arow]; 4709 nextaj = 0; 4710 for (j=0; nextaj<anzi; j++) { 4711 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4712 ba_i[j] += aa[nextaj++]; 4713 } 4714 } 4715 4716 /* add received vals into ba */ 4717 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4718 /* i-th row */ 4719 if (i == *nextrow[k]) { 4720 anzi = *(nextai[k]+1) - *nextai[k]; 4721 aj = buf_rj[k] + *(nextai[k]); 4722 aa = abuf_r[k] + *(nextai[k]); 4723 nextaj = 0; 4724 for (j=0; nextaj<anzi; j++) { 4725 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4726 ba_i[j] += aa[nextaj++]; 4727 } 4728 } 4729 nextrow[k]++; nextai[k]++; 4730 } 4731 } 4732 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4733 } 4734 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4735 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4736 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4737 4738 PetscCall(PetscFree(abuf_r[0])); 4739 PetscCall(PetscFree(abuf_r)); 4740 PetscCall(PetscFree(ba_i)); 4741 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4742 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4743 PetscFunctionReturn(0); 4744 } 4745 4746 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4747 { 4748 Mat B_mpi; 4749 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4750 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4751 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4752 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4753 PetscInt len,proc,*dnz,*onz,bs,cbs; 4754 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4755 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4756 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4757 MPI_Status *status; 4758 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4759 PetscBT lnkbt; 4760 Mat_Merge_SeqsToMPI *merge; 4761 PetscContainer container; 4762 4763 PetscFunctionBegin; 4764 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4765 4766 /* make sure it is a PETSc comm */ 4767 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4768 PetscCallMPI(MPI_Comm_size(comm,&size)); 4769 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4770 4771 PetscCall(PetscNew(&merge)); 4772 PetscCall(PetscMalloc1(size,&status)); 4773 4774 /* determine row ownership */ 4775 /*---------------------------------------------------------*/ 4776 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4777 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4778 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4779 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4780 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4781 PetscCall(PetscMalloc1(size,&len_si)); 4782 PetscCall(PetscMalloc1(size,&merge->len_s)); 4783 4784 m = merge->rowmap->n; 4785 owners = merge->rowmap->range; 4786 4787 /* determine the number of messages to send, their lengths */ 4788 /*---------------------------------------------------------*/ 4789 len_s = merge->len_s; 4790 4791 len = 0; /* length of buf_si[] */ 4792 merge->nsend = 0; 4793 for (proc=0; proc<size; proc++) { 4794 len_si[proc] = 0; 4795 if (proc == rank) { 4796 len_s[proc] = 0; 4797 } else { 4798 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4799 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4800 } 4801 if (len_s[proc]) { 4802 merge->nsend++; 4803 nrows = 0; 4804 for (i=owners[proc]; i<owners[proc+1]; i++) { 4805 if (ai[i+1] > ai[i]) nrows++; 4806 } 4807 len_si[proc] = 2*(nrows+1); 4808 len += len_si[proc]; 4809 } 4810 } 4811 4812 /* determine the number and length of messages to receive for ij-structure */ 4813 /*-------------------------------------------------------------------------*/ 4814 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4815 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4816 4817 /* post the Irecv of j-structure */ 4818 /*-------------------------------*/ 4819 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4820 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4821 4822 /* post the Isend of j-structure */ 4823 /*--------------------------------*/ 4824 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4825 4826 for (proc=0, k=0; proc<size; proc++) { 4827 if (!len_s[proc]) continue; 4828 i = owners[proc]; 4829 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4830 k++; 4831 } 4832 4833 /* receives and sends of j-structure are complete */ 4834 /*------------------------------------------------*/ 4835 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4836 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4837 4838 /* send and recv i-structure */ 4839 /*---------------------------*/ 4840 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4841 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4842 4843 PetscCall(PetscMalloc1(len+1,&buf_s)); 4844 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4845 for (proc=0,k=0; proc<size; proc++) { 4846 if (!len_s[proc]) continue; 4847 /* form outgoing message for i-structure: 4848 buf_si[0]: nrows to be sent 4849 [1:nrows]: row index (global) 4850 [nrows+1:2*nrows+1]: i-structure index 4851 */ 4852 /*-------------------------------------------*/ 4853 nrows = len_si[proc]/2 - 1; 4854 buf_si_i = buf_si + nrows+1; 4855 buf_si[0] = nrows; 4856 buf_si_i[0] = 0; 4857 nrows = 0; 4858 for (i=owners[proc]; i<owners[proc+1]; i++) { 4859 anzi = ai[i+1] - ai[i]; 4860 if (anzi) { 4861 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4862 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4863 nrows++; 4864 } 4865 } 4866 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4867 k++; 4868 buf_si += len_si[proc]; 4869 } 4870 4871 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4872 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4873 4874 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4875 for (i=0; i<merge->nrecv; i++) { 4876 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4877 } 4878 4879 PetscCall(PetscFree(len_si)); 4880 PetscCall(PetscFree(len_ri)); 4881 PetscCall(PetscFree(rj_waits)); 4882 PetscCall(PetscFree2(si_waits,sj_waits)); 4883 PetscCall(PetscFree(ri_waits)); 4884 PetscCall(PetscFree(buf_s)); 4885 PetscCall(PetscFree(status)); 4886 4887 /* compute a local seq matrix in each processor */ 4888 /*----------------------------------------------*/ 4889 /* allocate bi array and free space for accumulating nonzero column info */ 4890 PetscCall(PetscMalloc1(m+1,&bi)); 4891 bi[0] = 0; 4892 4893 /* create and initialize a linked list */ 4894 nlnk = N+1; 4895 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4896 4897 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4898 len = ai[owners[rank+1]] - ai[owners[rank]]; 4899 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4900 4901 current_space = free_space; 4902 4903 /* determine symbolic info for each local row */ 4904 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4905 4906 for (k=0; k<merge->nrecv; k++) { 4907 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4908 nrows = *buf_ri_k[k]; 4909 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4910 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4911 } 4912 4913 MatPreallocateBegin(comm,m,n,dnz,onz); 4914 len = 0; 4915 for (i=0; i<m; i++) { 4916 bnzi = 0; 4917 /* add local non-zero cols of this proc's seqmat into lnk */ 4918 arow = owners[rank] + i; 4919 anzi = ai[arow+1] - ai[arow]; 4920 aj = a->j + ai[arow]; 4921 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4922 bnzi += nlnk; 4923 /* add received col data into lnk */ 4924 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4925 if (i == *nextrow[k]) { /* i-th row */ 4926 anzi = *(nextai[k]+1) - *nextai[k]; 4927 aj = buf_rj[k] + *nextai[k]; 4928 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4929 bnzi += nlnk; 4930 nextrow[k]++; nextai[k]++; 4931 } 4932 } 4933 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4934 4935 /* if free space is not available, make more free space */ 4936 if (current_space->local_remaining<bnzi) { 4937 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4938 nspacedouble++; 4939 } 4940 /* copy data into free space, then initialize lnk */ 4941 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4942 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4943 4944 current_space->array += bnzi; 4945 current_space->local_used += bnzi; 4946 current_space->local_remaining -= bnzi; 4947 4948 bi[i+1] = bi[i] + bnzi; 4949 } 4950 4951 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4952 4953 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4954 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4955 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4956 4957 /* create symbolic parallel matrix B_mpi */ 4958 /*---------------------------------------*/ 4959 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4960 PetscCall(MatCreate(comm,&B_mpi)); 4961 if (n==PETSC_DECIDE) { 4962 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4963 } else { 4964 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4965 } 4966 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4967 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4968 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4969 MatPreallocateEnd(dnz,onz); 4970 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4971 4972 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4973 B_mpi->assembled = PETSC_FALSE; 4974 merge->bi = bi; 4975 merge->bj = bj; 4976 merge->buf_ri = buf_ri; 4977 merge->buf_rj = buf_rj; 4978 merge->coi = NULL; 4979 merge->coj = NULL; 4980 merge->owners_co = NULL; 4981 4982 PetscCall(PetscCommDestroy(&comm)); 4983 4984 /* attach the supporting struct to B_mpi for reuse */ 4985 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 4986 PetscCall(PetscContainerSetPointer(container,merge)); 4987 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 4988 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 4989 PetscCall(PetscContainerDestroy(&container)); 4990 *mpimat = B_mpi; 4991 4992 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4993 PetscFunctionReturn(0); 4994 } 4995 4996 /*@C 4997 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4998 matrices from each processor 4999 5000 Collective 5001 5002 Input Parameters: 5003 + comm - the communicators the parallel matrix will live on 5004 . seqmat - the input sequential matrices 5005 . m - number of local rows (or PETSC_DECIDE) 5006 . n - number of local columns (or PETSC_DECIDE) 5007 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5008 5009 Output Parameter: 5010 . mpimat - the parallel matrix generated 5011 5012 Level: advanced 5013 5014 Notes: 5015 The dimensions of the sequential matrix in each processor MUST be the same. 5016 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5017 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5018 @*/ 5019 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5020 { 5021 PetscMPIInt size; 5022 5023 PetscFunctionBegin; 5024 PetscCallMPI(MPI_Comm_size(comm,&size)); 5025 if (size == 1) { 5026 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5027 if (scall == MAT_INITIAL_MATRIX) { 5028 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 5029 } else { 5030 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 5031 } 5032 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5033 PetscFunctionReturn(0); 5034 } 5035 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5036 if (scall == MAT_INITIAL_MATRIX) { 5037 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 5038 } 5039 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 5040 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5041 PetscFunctionReturn(0); 5042 } 5043 5044 /*@ 5045 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5046 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5047 with MatGetSize() 5048 5049 Not Collective 5050 5051 Input Parameters: 5052 + A - the matrix 5053 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5054 5055 Output Parameter: 5056 . A_loc - the local sequential matrix generated 5057 5058 Level: developer 5059 5060 Notes: 5061 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5062 5063 Destroy the matrix with MatDestroy() 5064 5065 .seealso: MatMPIAIJGetLocalMat() 5066 5067 @*/ 5068 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5069 { 5070 PetscBool mpi; 5071 5072 PetscFunctionBegin; 5073 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5074 if (mpi) { 5075 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5076 } else { 5077 *A_loc = A; 5078 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5079 } 5080 PetscFunctionReturn(0); 5081 } 5082 5083 /*@ 5084 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5085 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5086 with MatGetSize() 5087 5088 Not Collective 5089 5090 Input Parameters: 5091 + A - the matrix 5092 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5093 5094 Output Parameter: 5095 . A_loc - the local sequential matrix generated 5096 5097 Level: developer 5098 5099 Notes: 5100 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5101 5102 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5103 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5104 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5105 modify the values of the returned A_loc. 5106 5107 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5108 @*/ 5109 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5110 { 5111 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5112 Mat_SeqAIJ *mat,*a,*b; 5113 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5114 const PetscScalar *aa,*ba,*aav,*bav; 5115 PetscScalar *ca,*cam; 5116 PetscMPIInt size; 5117 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5118 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5119 PetscBool match; 5120 5121 PetscFunctionBegin; 5122 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5123 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5124 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5125 if (size == 1) { 5126 if (scall == MAT_INITIAL_MATRIX) { 5127 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5128 *A_loc = mpimat->A; 5129 } else if (scall == MAT_REUSE_MATRIX) { 5130 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5131 } 5132 PetscFunctionReturn(0); 5133 } 5134 5135 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5136 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5137 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5138 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5139 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5140 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5141 aa = aav; 5142 ba = bav; 5143 if (scall == MAT_INITIAL_MATRIX) { 5144 PetscCall(PetscMalloc1(1+am,&ci)); 5145 ci[0] = 0; 5146 for (i=0; i<am; i++) { 5147 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5148 } 5149 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5150 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5151 k = 0; 5152 for (i=0; i<am; i++) { 5153 ncols_o = bi[i+1] - bi[i]; 5154 ncols_d = ai[i+1] - ai[i]; 5155 /* off-diagonal portion of A */ 5156 for (jo=0; jo<ncols_o; jo++) { 5157 col = cmap[*bj]; 5158 if (col >= cstart) break; 5159 cj[k] = col; bj++; 5160 ca[k++] = *ba++; 5161 } 5162 /* diagonal portion of A */ 5163 for (j=0; j<ncols_d; j++) { 5164 cj[k] = cstart + *aj++; 5165 ca[k++] = *aa++; 5166 } 5167 /* off-diagonal portion of A */ 5168 for (j=jo; j<ncols_o; j++) { 5169 cj[k] = cmap[*bj++]; 5170 ca[k++] = *ba++; 5171 } 5172 } 5173 /* put together the new matrix */ 5174 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5175 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5176 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5177 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5178 mat->free_a = PETSC_TRUE; 5179 mat->free_ij = PETSC_TRUE; 5180 mat->nonew = 0; 5181 } else if (scall == MAT_REUSE_MATRIX) { 5182 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5183 ci = mat->i; 5184 cj = mat->j; 5185 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5186 for (i=0; i<am; i++) { 5187 /* off-diagonal portion of A */ 5188 ncols_o = bi[i+1] - bi[i]; 5189 for (jo=0; jo<ncols_o; jo++) { 5190 col = cmap[*bj]; 5191 if (col >= cstart) break; 5192 *cam++ = *ba++; bj++; 5193 } 5194 /* diagonal portion of A */ 5195 ncols_d = ai[i+1] - ai[i]; 5196 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5197 /* off-diagonal portion of A */ 5198 for (j=jo; j<ncols_o; j++) { 5199 *cam++ = *ba++; bj++; 5200 } 5201 } 5202 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5203 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5204 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5205 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5206 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5207 PetscFunctionReturn(0); 5208 } 5209 5210 /*@ 5211 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5212 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5213 5214 Not Collective 5215 5216 Input Parameters: 5217 + A - the matrix 5218 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5219 5220 Output Parameters: 5221 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5222 - A_loc - the local sequential matrix generated 5223 5224 Level: developer 5225 5226 Notes: 5227 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5228 5229 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5230 5231 @*/ 5232 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5233 { 5234 Mat Ao,Ad; 5235 const PetscInt *cmap; 5236 PetscMPIInt size; 5237 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5238 5239 PetscFunctionBegin; 5240 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5241 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5242 if (size == 1) { 5243 if (scall == MAT_INITIAL_MATRIX) { 5244 PetscCall(PetscObjectReference((PetscObject)Ad)); 5245 *A_loc = Ad; 5246 } else if (scall == MAT_REUSE_MATRIX) { 5247 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5248 } 5249 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5250 PetscFunctionReturn(0); 5251 } 5252 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5253 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5254 if (f) { 5255 PetscCall((*f)(A,scall,glob,A_loc)); 5256 } else { 5257 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5258 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5259 Mat_SeqAIJ *c; 5260 PetscInt *ai = a->i, *aj = a->j; 5261 PetscInt *bi = b->i, *bj = b->j; 5262 PetscInt *ci,*cj; 5263 const PetscScalar *aa,*ba; 5264 PetscScalar *ca; 5265 PetscInt i,j,am,dn,on; 5266 5267 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5268 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5269 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5270 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5271 if (scall == MAT_INITIAL_MATRIX) { 5272 PetscInt k; 5273 PetscCall(PetscMalloc1(1+am,&ci)); 5274 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5275 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5276 ci[0] = 0; 5277 for (i=0,k=0; i<am; i++) { 5278 const PetscInt ncols_o = bi[i+1] - bi[i]; 5279 const PetscInt ncols_d = ai[i+1] - ai[i]; 5280 ci[i+1] = ci[i] + ncols_o + ncols_d; 5281 /* diagonal portion of A */ 5282 for (j=0; j<ncols_d; j++,k++) { 5283 cj[k] = *aj++; 5284 ca[k] = *aa++; 5285 } 5286 /* off-diagonal portion of A */ 5287 for (j=0; j<ncols_o; j++,k++) { 5288 cj[k] = dn + *bj++; 5289 ca[k] = *ba++; 5290 } 5291 } 5292 /* put together the new matrix */ 5293 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5294 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5295 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5296 c = (Mat_SeqAIJ*)(*A_loc)->data; 5297 c->free_a = PETSC_TRUE; 5298 c->free_ij = PETSC_TRUE; 5299 c->nonew = 0; 5300 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5301 } else if (scall == MAT_REUSE_MATRIX) { 5302 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5303 for (i=0; i<am; i++) { 5304 const PetscInt ncols_d = ai[i+1] - ai[i]; 5305 const PetscInt ncols_o = bi[i+1] - bi[i]; 5306 /* diagonal portion of A */ 5307 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5308 /* off-diagonal portion of A */ 5309 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5310 } 5311 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5312 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5313 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5314 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5315 if (glob) { 5316 PetscInt cst, *gidx; 5317 5318 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5319 PetscCall(PetscMalloc1(dn+on,&gidx)); 5320 for (i=0; i<dn; i++) gidx[i] = cst + i; 5321 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5322 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5323 } 5324 } 5325 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5326 PetscFunctionReturn(0); 5327 } 5328 5329 /*@C 5330 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5331 5332 Not Collective 5333 5334 Input Parameters: 5335 + A - the matrix 5336 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5337 - row, col - index sets of rows and columns to extract (or NULL) 5338 5339 Output Parameter: 5340 . A_loc - the local sequential matrix generated 5341 5342 Level: developer 5343 5344 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5345 5346 @*/ 5347 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5348 { 5349 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5350 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5351 IS isrowa,iscola; 5352 Mat *aloc; 5353 PetscBool match; 5354 5355 PetscFunctionBegin; 5356 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5357 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5358 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5359 if (!row) { 5360 start = A->rmap->rstart; end = A->rmap->rend; 5361 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5362 } else { 5363 isrowa = *row; 5364 } 5365 if (!col) { 5366 start = A->cmap->rstart; 5367 cmap = a->garray; 5368 nzA = a->A->cmap->n; 5369 nzB = a->B->cmap->n; 5370 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5371 ncols = 0; 5372 for (i=0; i<nzB; i++) { 5373 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5374 else break; 5375 } 5376 imark = i; 5377 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5378 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5379 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5380 } else { 5381 iscola = *col; 5382 } 5383 if (scall != MAT_INITIAL_MATRIX) { 5384 PetscCall(PetscMalloc1(1,&aloc)); 5385 aloc[0] = *A_loc; 5386 } 5387 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5388 if (!col) { /* attach global id of condensed columns */ 5389 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5390 } 5391 *A_loc = aloc[0]; 5392 PetscCall(PetscFree(aloc)); 5393 if (!row) { 5394 PetscCall(ISDestroy(&isrowa)); 5395 } 5396 if (!col) { 5397 PetscCall(ISDestroy(&iscola)); 5398 } 5399 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5400 PetscFunctionReturn(0); 5401 } 5402 5403 /* 5404 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5405 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5406 * on a global size. 5407 * */ 5408 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5409 { 5410 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5411 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5412 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5413 PetscMPIInt owner; 5414 PetscSFNode *iremote,*oiremote; 5415 const PetscInt *lrowindices; 5416 PetscSF sf,osf; 5417 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5418 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5419 MPI_Comm comm; 5420 ISLocalToGlobalMapping mapping; 5421 const PetscScalar *pd_a,*po_a; 5422 5423 PetscFunctionBegin; 5424 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5425 /* plocalsize is the number of roots 5426 * nrows is the number of leaves 5427 * */ 5428 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5429 PetscCall(ISGetLocalSize(rows,&nrows)); 5430 PetscCall(PetscCalloc1(nrows,&iremote)); 5431 PetscCall(ISGetIndices(rows,&lrowindices)); 5432 for (i=0;i<nrows;i++) { 5433 /* Find a remote index and an owner for a row 5434 * The row could be local or remote 5435 * */ 5436 owner = 0; 5437 lidx = 0; 5438 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5439 iremote[i].index = lidx; 5440 iremote[i].rank = owner; 5441 } 5442 /* Create SF to communicate how many nonzero columns for each row */ 5443 PetscCall(PetscSFCreate(comm,&sf)); 5444 /* SF will figure out the number of nonzero colunms for each row, and their 5445 * offsets 5446 * */ 5447 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5448 PetscCall(PetscSFSetFromOptions(sf)); 5449 PetscCall(PetscSFSetUp(sf)); 5450 5451 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5452 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5453 PetscCall(PetscCalloc1(nrows,&pnnz)); 5454 roffsets[0] = 0; 5455 roffsets[1] = 0; 5456 for (i=0;i<plocalsize;i++) { 5457 /* diag */ 5458 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5459 /* off diag */ 5460 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5461 /* compute offsets so that we relative location for each row */ 5462 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5463 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5464 } 5465 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5466 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5467 /* 'r' means root, and 'l' means leaf */ 5468 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5469 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5470 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5471 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5472 PetscCall(PetscSFDestroy(&sf)); 5473 PetscCall(PetscFree(roffsets)); 5474 PetscCall(PetscFree(nrcols)); 5475 dntotalcols = 0; 5476 ontotalcols = 0; 5477 ncol = 0; 5478 for (i=0;i<nrows;i++) { 5479 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5480 ncol = PetscMax(pnnz[i],ncol); 5481 /* diag */ 5482 dntotalcols += nlcols[i*2+0]; 5483 /* off diag */ 5484 ontotalcols += nlcols[i*2+1]; 5485 } 5486 /* We do not need to figure the right number of columns 5487 * since all the calculations will be done by going through the raw data 5488 * */ 5489 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5490 PetscCall(MatSetUp(*P_oth)); 5491 PetscCall(PetscFree(pnnz)); 5492 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5493 /* diag */ 5494 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5495 /* off diag */ 5496 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5497 /* diag */ 5498 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5499 /* off diag */ 5500 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5501 dntotalcols = 0; 5502 ontotalcols = 0; 5503 ntotalcols = 0; 5504 for (i=0;i<nrows;i++) { 5505 owner = 0; 5506 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5507 /* Set iremote for diag matrix */ 5508 for (j=0;j<nlcols[i*2+0];j++) { 5509 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5510 iremote[dntotalcols].rank = owner; 5511 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5512 ilocal[dntotalcols++] = ntotalcols++; 5513 } 5514 /* off diag */ 5515 for (j=0;j<nlcols[i*2+1];j++) { 5516 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5517 oiremote[ontotalcols].rank = owner; 5518 oilocal[ontotalcols++] = ntotalcols++; 5519 } 5520 } 5521 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5522 PetscCall(PetscFree(loffsets)); 5523 PetscCall(PetscFree(nlcols)); 5524 PetscCall(PetscSFCreate(comm,&sf)); 5525 /* P serves as roots and P_oth is leaves 5526 * Diag matrix 5527 * */ 5528 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5529 PetscCall(PetscSFSetFromOptions(sf)); 5530 PetscCall(PetscSFSetUp(sf)); 5531 5532 PetscCall(PetscSFCreate(comm,&osf)); 5533 /* Off diag */ 5534 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5535 PetscCall(PetscSFSetFromOptions(osf)); 5536 PetscCall(PetscSFSetUp(osf)); 5537 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5538 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5539 /* We operate on the matrix internal data for saving memory */ 5540 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5541 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5542 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5543 /* Convert to global indices for diag matrix */ 5544 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5545 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5546 /* We want P_oth store global indices */ 5547 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5548 /* Use memory scalable approach */ 5549 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5550 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5551 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5552 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5553 /* Convert back to local indices */ 5554 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5555 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5556 nout = 0; 5557 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5558 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5559 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5560 /* Exchange values */ 5561 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5562 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5563 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5564 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5565 /* Stop PETSc from shrinking memory */ 5566 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5567 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5568 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5569 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5570 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5571 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5572 PetscCall(PetscSFDestroy(&sf)); 5573 PetscCall(PetscSFDestroy(&osf)); 5574 PetscFunctionReturn(0); 5575 } 5576 5577 /* 5578 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5579 * This supports MPIAIJ and MAIJ 5580 * */ 5581 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5582 { 5583 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5584 Mat_SeqAIJ *p_oth; 5585 IS rows,map; 5586 PetscHMapI hamp; 5587 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5588 MPI_Comm comm; 5589 PetscSF sf,osf; 5590 PetscBool has; 5591 5592 PetscFunctionBegin; 5593 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5594 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5595 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5596 * and then create a submatrix (that often is an overlapping matrix) 5597 * */ 5598 if (reuse == MAT_INITIAL_MATRIX) { 5599 /* Use a hash table to figure out unique keys */ 5600 PetscCall(PetscHMapICreate(&hamp)); 5601 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5602 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5603 count = 0; 5604 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5605 for (i=0;i<a->B->cmap->n;i++) { 5606 key = a->garray[i]/dof; 5607 PetscCall(PetscHMapIHas(hamp,key,&has)); 5608 if (!has) { 5609 mapping[i] = count; 5610 PetscCall(PetscHMapISet(hamp,key,count++)); 5611 } else { 5612 /* Current 'i' has the same value the previous step */ 5613 mapping[i] = count-1; 5614 } 5615 } 5616 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5617 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5618 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5619 PetscCall(PetscCalloc1(htsize,&rowindices)); 5620 off = 0; 5621 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5622 PetscCall(PetscHMapIDestroy(&hamp)); 5623 PetscCall(PetscSortInt(htsize,rowindices)); 5624 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5625 /* In case, the matrix was already created but users want to recreate the matrix */ 5626 PetscCall(MatDestroy(P_oth)); 5627 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5628 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5629 PetscCall(ISDestroy(&map)); 5630 PetscCall(ISDestroy(&rows)); 5631 } else if (reuse == MAT_REUSE_MATRIX) { 5632 /* If matrix was already created, we simply update values using SF objects 5633 * that as attached to the matrix ealier. 5634 */ 5635 const PetscScalar *pd_a,*po_a; 5636 5637 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5638 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5639 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5640 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5641 /* Update values in place */ 5642 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5643 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5644 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5645 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5646 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5647 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5648 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5649 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5650 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5651 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5652 PetscFunctionReturn(0); 5653 } 5654 5655 /*@C 5656 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5657 5658 Collective on Mat 5659 5660 Input Parameters: 5661 + A - the first matrix in mpiaij format 5662 . B - the second matrix in mpiaij format 5663 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5664 5665 Output Parameters: 5666 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5667 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5668 - B_seq - the sequential matrix generated 5669 5670 Level: developer 5671 5672 @*/ 5673 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5674 { 5675 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5676 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5677 IS isrowb,iscolb; 5678 Mat *bseq=NULL; 5679 5680 PetscFunctionBegin; 5681 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5682 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5683 } 5684 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5685 5686 if (scall == MAT_INITIAL_MATRIX) { 5687 start = A->cmap->rstart; 5688 cmap = a->garray; 5689 nzA = a->A->cmap->n; 5690 nzB = a->B->cmap->n; 5691 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5692 ncols = 0; 5693 for (i=0; i<nzB; i++) { /* row < local row index */ 5694 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5695 else break; 5696 } 5697 imark = i; 5698 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5699 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5700 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5701 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5702 } else { 5703 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5704 isrowb = *rowb; iscolb = *colb; 5705 PetscCall(PetscMalloc1(1,&bseq)); 5706 bseq[0] = *B_seq; 5707 } 5708 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5709 *B_seq = bseq[0]; 5710 PetscCall(PetscFree(bseq)); 5711 if (!rowb) { 5712 PetscCall(ISDestroy(&isrowb)); 5713 } else { 5714 *rowb = isrowb; 5715 } 5716 if (!colb) { 5717 PetscCall(ISDestroy(&iscolb)); 5718 } else { 5719 *colb = iscolb; 5720 } 5721 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5722 PetscFunctionReturn(0); 5723 } 5724 5725 /* 5726 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5727 of the OFF-DIAGONAL portion of local A 5728 5729 Collective on Mat 5730 5731 Input Parameters: 5732 + A,B - the matrices in mpiaij format 5733 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5734 5735 Output Parameter: 5736 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5737 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5738 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5739 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5740 5741 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5742 for this matrix. This is not desirable.. 5743 5744 Level: developer 5745 5746 */ 5747 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5748 { 5749 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5750 Mat_SeqAIJ *b_oth; 5751 VecScatter ctx; 5752 MPI_Comm comm; 5753 const PetscMPIInt *rprocs,*sprocs; 5754 const PetscInt *srow,*rstarts,*sstarts; 5755 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5756 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5757 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5758 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5759 PetscMPIInt size,tag,rank,nreqs; 5760 5761 PetscFunctionBegin; 5762 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5763 PetscCallMPI(MPI_Comm_size(comm,&size)); 5764 5765 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5766 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5767 } 5768 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5769 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5770 5771 if (size == 1) { 5772 startsj_s = NULL; 5773 bufa_ptr = NULL; 5774 *B_oth = NULL; 5775 PetscFunctionReturn(0); 5776 } 5777 5778 ctx = a->Mvctx; 5779 tag = ((PetscObject)ctx)->tag; 5780 5781 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5782 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5783 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5784 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5785 PetscCall(PetscMalloc1(nreqs,&reqs)); 5786 rwaits = reqs; 5787 swaits = reqs + nrecvs; 5788 5789 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5790 if (scall == MAT_INITIAL_MATRIX) { 5791 /* i-array */ 5792 /*---------*/ 5793 /* post receives */ 5794 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5795 for (i=0; i<nrecvs; i++) { 5796 rowlen = rvalues + rstarts[i]*rbs; 5797 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5798 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5799 } 5800 5801 /* pack the outgoing message */ 5802 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5803 5804 sstartsj[0] = 0; 5805 rstartsj[0] = 0; 5806 len = 0; /* total length of j or a array to be sent */ 5807 if (nsends) { 5808 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5809 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5810 } 5811 for (i=0; i<nsends; i++) { 5812 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5813 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5814 for (j=0; j<nrows; j++) { 5815 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5816 for (l=0; l<sbs; l++) { 5817 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5818 5819 rowlen[j*sbs+l] = ncols; 5820 5821 len += ncols; 5822 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5823 } 5824 k++; 5825 } 5826 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5827 5828 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5829 } 5830 /* recvs and sends of i-array are completed */ 5831 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5832 PetscCall(PetscFree(svalues)); 5833 5834 /* allocate buffers for sending j and a arrays */ 5835 PetscCall(PetscMalloc1(len+1,&bufj)); 5836 PetscCall(PetscMalloc1(len+1,&bufa)); 5837 5838 /* create i-array of B_oth */ 5839 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5840 5841 b_othi[0] = 0; 5842 len = 0; /* total length of j or a array to be received */ 5843 k = 0; 5844 for (i=0; i<nrecvs; i++) { 5845 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5846 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5847 for (j=0; j<nrows; j++) { 5848 b_othi[k+1] = b_othi[k] + rowlen[j]; 5849 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5850 k++; 5851 } 5852 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5853 } 5854 PetscCall(PetscFree(rvalues)); 5855 5856 /* allocate space for j and a arrays of B_oth */ 5857 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5858 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5859 5860 /* j-array */ 5861 /*---------*/ 5862 /* post receives of j-array */ 5863 for (i=0; i<nrecvs; i++) { 5864 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5865 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5866 } 5867 5868 /* pack the outgoing message j-array */ 5869 if (nsends) k = sstarts[0]; 5870 for (i=0; i<nsends; i++) { 5871 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5872 bufJ = bufj+sstartsj[i]; 5873 for (j=0; j<nrows; j++) { 5874 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5875 for (ll=0; ll<sbs; ll++) { 5876 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5877 for (l=0; l<ncols; l++) { 5878 *bufJ++ = cols[l]; 5879 } 5880 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5881 } 5882 } 5883 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5884 } 5885 5886 /* recvs and sends of j-array are completed */ 5887 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5888 } else if (scall == MAT_REUSE_MATRIX) { 5889 sstartsj = *startsj_s; 5890 rstartsj = *startsj_r; 5891 bufa = *bufa_ptr; 5892 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5893 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5894 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5895 5896 /* a-array */ 5897 /*---------*/ 5898 /* post receives of a-array */ 5899 for (i=0; i<nrecvs; i++) { 5900 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5901 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5902 } 5903 5904 /* pack the outgoing message a-array */ 5905 if (nsends) k = sstarts[0]; 5906 for (i=0; i<nsends; i++) { 5907 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5908 bufA = bufa+sstartsj[i]; 5909 for (j=0; j<nrows; j++) { 5910 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5911 for (ll=0; ll<sbs; ll++) { 5912 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5913 for (l=0; l<ncols; l++) { 5914 *bufA++ = vals[l]; 5915 } 5916 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5917 } 5918 } 5919 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5920 } 5921 /* recvs and sends of a-array are completed */ 5922 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5923 PetscCall(PetscFree(reqs)); 5924 5925 if (scall == MAT_INITIAL_MATRIX) { 5926 /* put together the new matrix */ 5927 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5928 5929 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5930 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5931 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5932 b_oth->free_a = PETSC_TRUE; 5933 b_oth->free_ij = PETSC_TRUE; 5934 b_oth->nonew = 0; 5935 5936 PetscCall(PetscFree(bufj)); 5937 if (!startsj_s || !bufa_ptr) { 5938 PetscCall(PetscFree2(sstartsj,rstartsj)); 5939 PetscCall(PetscFree(bufa_ptr)); 5940 } else { 5941 *startsj_s = sstartsj; 5942 *startsj_r = rstartsj; 5943 *bufa_ptr = bufa; 5944 } 5945 } else if (scall == MAT_REUSE_MATRIX) { 5946 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5947 } 5948 5949 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5950 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5951 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5952 PetscFunctionReturn(0); 5953 } 5954 5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5956 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5958 #if defined(PETSC_HAVE_MKL_SPARSE) 5959 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5960 #endif 5961 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5963 #if defined(PETSC_HAVE_ELEMENTAL) 5964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5965 #endif 5966 #if defined(PETSC_HAVE_SCALAPACK) 5967 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5968 #endif 5969 #if defined(PETSC_HAVE_HYPRE) 5970 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5971 #endif 5972 #if defined(PETSC_HAVE_CUDA) 5973 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5974 #endif 5975 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5976 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5977 #endif 5978 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5979 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5980 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5981 5982 /* 5983 Computes (B'*A')' since computing B*A directly is untenable 5984 5985 n p p 5986 [ ] [ ] [ ] 5987 m [ A ] * n [ B ] = m [ C ] 5988 [ ] [ ] [ ] 5989 5990 */ 5991 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5992 { 5993 Mat At,Bt,Ct; 5994 5995 PetscFunctionBegin; 5996 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 5997 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 5998 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 5999 PetscCall(MatDestroy(&At)); 6000 PetscCall(MatDestroy(&Bt)); 6001 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 6002 PetscCall(MatDestroy(&Ct)); 6003 PetscFunctionReturn(0); 6004 } 6005 6006 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6007 { 6008 PetscBool cisdense; 6009 6010 PetscFunctionBegin; 6011 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 6012 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 6013 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 6014 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 6015 if (!cisdense) { 6016 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 6017 } 6018 PetscCall(MatSetUp(C)); 6019 6020 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6021 PetscFunctionReturn(0); 6022 } 6023 6024 /* ----------------------------------------------------------------*/ 6025 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6026 { 6027 Mat_Product *product = C->product; 6028 Mat A = product->A,B=product->B; 6029 6030 PetscFunctionBegin; 6031 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6032 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6033 6034 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6035 C->ops->productsymbolic = MatProductSymbolic_AB; 6036 PetscFunctionReturn(0); 6037 } 6038 6039 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6040 { 6041 Mat_Product *product = C->product; 6042 6043 PetscFunctionBegin; 6044 if (product->type == MATPRODUCT_AB) { 6045 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6046 } 6047 PetscFunctionReturn(0); 6048 } 6049 6050 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6051 6052 Input Parameters: 6053 6054 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6055 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6056 6057 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6058 6059 For Set1, j1[] contains column indices of the nonzeros. 6060 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6061 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6062 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6063 6064 Similar for Set2. 6065 6066 This routine merges the two sets of nonzeros row by row and removes repeats. 6067 6068 Output Parameters: (memory is allocated by the caller) 6069 6070 i[],j[]: the CSR of the merged matrix, which has m rows. 6071 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6072 imap2[]: similar to imap1[], but for Set2. 6073 Note we order nonzeros row-by-row and from left to right. 6074 */ 6075 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6076 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6077 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6078 { 6079 PetscInt r,m; /* Row index of mat */ 6080 PetscCount t,t1,t2,b1,e1,b2,e2; 6081 6082 PetscFunctionBegin; 6083 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6084 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6085 i[0] = 0; 6086 for (r=0; r<m; r++) { /* Do row by row merging */ 6087 b1 = rowBegin1[r]; 6088 e1 = rowEnd1[r]; 6089 b2 = rowBegin2[r]; 6090 e2 = rowEnd2[r]; 6091 while (b1 < e1 && b2 < e2) { 6092 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6093 j[t] = j1[b1]; 6094 imap1[t1] = t; 6095 imap2[t2] = t; 6096 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6097 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6098 t1++; t2++; t++; 6099 } else if (j1[b1] < j2[b2]) { 6100 j[t] = j1[b1]; 6101 imap1[t1] = t; 6102 b1 += jmap1[t1+1] - jmap1[t1]; 6103 t1++; t++; 6104 } else { 6105 j[t] = j2[b2]; 6106 imap2[t2] = t; 6107 b2 += jmap2[t2+1] - jmap2[t2]; 6108 t2++; t++; 6109 } 6110 } 6111 /* Merge the remaining in either j1[] or j2[] */ 6112 while (b1 < e1) { 6113 j[t] = j1[b1]; 6114 imap1[t1] = t; 6115 b1 += jmap1[t1+1] - jmap1[t1]; 6116 t1++; t++; 6117 } 6118 while (b2 < e2) { 6119 j[t] = j2[b2]; 6120 imap2[t2] = t; 6121 b2 += jmap2[t2+1] - jmap2[t2]; 6122 t2++; t++; 6123 } 6124 i[r+1] = t; 6125 } 6126 PetscFunctionReturn(0); 6127 } 6128 6129 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6130 6131 Input Parameters: 6132 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6133 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6134 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6135 6136 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6137 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6138 6139 Output Parameters: 6140 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6141 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6142 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6143 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6144 6145 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6146 Atot: number of entries belonging to the diagonal block. 6147 Annz: number of unique nonzeros belonging to the diagonal block. 6148 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6149 repeats (i.e., same 'i,j' pair). 6150 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6151 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6152 6153 Atot: number of entries belonging to the diagonal block 6154 Annz: number of unique nonzeros belonging to the diagonal block. 6155 6156 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6157 6158 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6159 */ 6160 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6161 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6162 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6163 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6164 { 6165 PetscInt cstart,cend,rstart,rend,row,col; 6166 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6167 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6168 PetscCount k,m,p,q,r,s,mid; 6169 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6170 6171 PetscFunctionBegin; 6172 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6173 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6174 m = rend - rstart; 6175 6176 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6177 6178 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6179 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6180 */ 6181 while (k<n) { 6182 row = i[k]; 6183 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6184 for (s=k; s<n; s++) if (i[s] != row) break; 6185 for (p=k; p<s; p++) { 6186 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6187 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6188 } 6189 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6190 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6191 rowBegin[row-rstart] = k; 6192 rowMid[row-rstart] = mid; 6193 rowEnd[row-rstart] = s; 6194 6195 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6196 Atot += mid - k; 6197 Btot += s - mid; 6198 6199 /* Count unique nonzeros of this diag/offdiag row */ 6200 for (p=k; p<mid;) { 6201 col = j[p]; 6202 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6203 Annz++; 6204 } 6205 6206 for (p=mid; p<s;) { 6207 col = j[p]; 6208 do {p++;} while (p<s && j[p] == col); 6209 Bnnz++; 6210 } 6211 k = s; 6212 } 6213 6214 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6215 PetscCall(PetscMalloc1(Atot,&Aperm)); 6216 PetscCall(PetscMalloc1(Btot,&Bperm)); 6217 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6218 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6219 6220 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6221 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6222 for (r=0; r<m; r++) { 6223 k = rowBegin[r]; 6224 mid = rowMid[r]; 6225 s = rowEnd[r]; 6226 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6227 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6228 Atot += mid - k; 6229 Btot += s - mid; 6230 6231 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6232 for (p=k; p<mid;) { 6233 col = j[p]; 6234 q = p; 6235 do {p++;} while (p<mid && j[p] == col); 6236 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6237 Annz++; 6238 } 6239 6240 for (p=mid; p<s;) { 6241 col = j[p]; 6242 q = p; 6243 do {p++;} while (p<s && j[p] == col); 6244 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6245 Bnnz++; 6246 } 6247 } 6248 /* Output */ 6249 *Aperm_ = Aperm; 6250 *Annz_ = Annz; 6251 *Atot_ = Atot; 6252 *Ajmap_ = Ajmap; 6253 *Bperm_ = Bperm; 6254 *Bnnz_ = Bnnz; 6255 *Btot_ = Btot; 6256 *Bjmap_ = Bjmap; 6257 PetscFunctionReturn(0); 6258 } 6259 6260 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6261 6262 Input Parameters: 6263 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6264 nnz: number of unique nonzeros in the merged matrix 6265 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6266 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6267 6268 Output Parameter: (memory is allocated by the caller) 6269 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6270 6271 Example: 6272 nnz1 = 4 6273 nnz = 6 6274 imap = [1,3,4,5] 6275 jmap = [0,3,5,6,7] 6276 then, 6277 jmap_new = [0,0,3,3,5,6,7] 6278 */ 6279 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6280 { 6281 PetscCount k,p; 6282 6283 PetscFunctionBegin; 6284 jmap_new[0] = 0; 6285 p = nnz; /* p loops over jmap_new[] backwards */ 6286 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6287 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6288 } 6289 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6290 PetscFunctionReturn(0); 6291 } 6292 6293 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6294 { 6295 MPI_Comm comm; 6296 PetscMPIInt rank,size; 6297 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6298 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6299 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6300 6301 PetscFunctionBegin; 6302 PetscCall(PetscFree(mpiaij->garray)); 6303 PetscCall(VecDestroy(&mpiaij->lvec)); 6304 #if defined(PETSC_USE_CTABLE) 6305 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6306 #else 6307 PetscCall(PetscFree(mpiaij->colmap)); 6308 #endif 6309 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6310 mat->assembled = PETSC_FALSE; 6311 mat->was_assembled = PETSC_FALSE; 6312 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6313 6314 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6315 PetscCallMPI(MPI_Comm_size(comm,&size)); 6316 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6317 PetscCall(PetscLayoutSetUp(mat->rmap)); 6318 PetscCall(PetscLayoutSetUp(mat->cmap)); 6319 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6320 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6321 PetscCall(MatGetLocalSize(mat,&m,&n)); 6322 PetscCall(MatGetSize(mat,&M,&N)); 6323 6324 /* ---------------------------------------------------------------------------*/ 6325 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6326 /* entries come first, then local rows, then remote rows. */ 6327 /* ---------------------------------------------------------------------------*/ 6328 PetscCount n1 = coo_n,*perm1; 6329 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6330 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6331 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6332 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6333 for (k=0; k<n1; k++) perm1[k] = k; 6334 6335 /* Manipulate indices so that entries with negative row or col indices will have smallest 6336 row indices, local entries will have greater but negative row indices, and remote entries 6337 will have positive row indices. 6338 */ 6339 for (k=0; k<n1; k++) { 6340 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6341 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6342 else { 6343 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6344 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6345 } 6346 } 6347 6348 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6349 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6350 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6351 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6352 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6353 6354 /* ---------------------------------------------------------------------------*/ 6355 /* Split local rows into diag/offdiag portions */ 6356 /* ---------------------------------------------------------------------------*/ 6357 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6358 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6359 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6360 6361 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6362 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6363 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6364 6365 /* ---------------------------------------------------------------------------*/ 6366 /* Send remote rows to their owner */ 6367 /* ---------------------------------------------------------------------------*/ 6368 /* Find which rows should be sent to which remote ranks*/ 6369 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6370 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6371 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6372 const PetscInt *ranges; 6373 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6374 6375 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6376 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6377 for (k=rem; k<n1;) { 6378 PetscMPIInt owner; 6379 PetscInt firstRow,lastRow; 6380 6381 /* Locate a row range */ 6382 firstRow = i1[k]; /* first row of this owner */ 6383 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6384 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6385 6386 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6387 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6388 6389 /* All entries in [k,p) belong to this remote owner */ 6390 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6391 PetscMPIInt *sendto2; 6392 PetscInt *nentries2; 6393 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6394 6395 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6396 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6397 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6398 PetscCall(PetscFree2(sendto,nentries2)); 6399 sendto = sendto2; 6400 nentries = nentries2; 6401 maxNsend = maxNsend2; 6402 } 6403 sendto[nsend] = owner; 6404 nentries[nsend] = p - k; 6405 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6406 nsend++; 6407 k = p; 6408 } 6409 6410 /* Build 1st SF to know offsets on remote to send data */ 6411 PetscSF sf1; 6412 PetscInt nroots = 1,nroots2 = 0; 6413 PetscInt nleaves = nsend,nleaves2 = 0; 6414 PetscInt *offsets; 6415 PetscSFNode *iremote; 6416 6417 PetscCall(PetscSFCreate(comm,&sf1)); 6418 PetscCall(PetscMalloc1(nsend,&iremote)); 6419 PetscCall(PetscMalloc1(nsend,&offsets)); 6420 for (k=0; k<nsend; k++) { 6421 iremote[k].rank = sendto[k]; 6422 iremote[k].index = 0; 6423 nleaves2 += nentries[k]; 6424 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6425 } 6426 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6427 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6428 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6429 PetscCall(PetscSFDestroy(&sf1)); 6430 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6431 6432 /* Build 2nd SF to send remote COOs to their owner */ 6433 PetscSF sf2; 6434 nroots = nroots2; 6435 nleaves = nleaves2; 6436 PetscCall(PetscSFCreate(comm,&sf2)); 6437 PetscCall(PetscSFSetFromOptions(sf2)); 6438 PetscCall(PetscMalloc1(nleaves,&iremote)); 6439 p = 0; 6440 for (k=0; k<nsend; k++) { 6441 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6442 for (q=0; q<nentries[k]; q++,p++) { 6443 iremote[p].rank = sendto[k]; 6444 iremote[p].index = offsets[k] + q; 6445 } 6446 } 6447 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6448 6449 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6450 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6451 6452 /* Send the remote COOs to their owner */ 6453 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6454 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6455 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6456 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6457 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6458 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6459 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6460 6461 PetscCall(PetscFree(offsets)); 6462 PetscCall(PetscFree2(sendto,nentries)); 6463 6464 /* ---------------------------------------------------------------*/ 6465 /* Sort received COOs by row along with the permutation array */ 6466 /* ---------------------------------------------------------------*/ 6467 for (k=0; k<n2; k++) perm2[k] = k; 6468 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6469 6470 /* ---------------------------------------------------------------*/ 6471 /* Split received COOs into diag/offdiag portions */ 6472 /* ---------------------------------------------------------------*/ 6473 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6474 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6475 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6476 6477 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6478 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6479 6480 /* --------------------------------------------------------------------------*/ 6481 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6482 /* --------------------------------------------------------------------------*/ 6483 PetscInt *Ai,*Bi; 6484 PetscInt *Aj,*Bj; 6485 6486 PetscCall(PetscMalloc1(m+1,&Ai)); 6487 PetscCall(PetscMalloc1(m+1,&Bi)); 6488 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6489 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6490 6491 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6492 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6493 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6494 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6495 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6496 6497 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6498 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6499 6500 /* --------------------------------------------------------------------------*/ 6501 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6502 /* expect nonzeros in A/B most likely have local contributing entries */ 6503 /* --------------------------------------------------------------------------*/ 6504 PetscInt Annz = Ai[m]; 6505 PetscInt Bnnz = Bi[m]; 6506 PetscCount *Ajmap1_new,*Bjmap1_new; 6507 6508 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6509 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6510 6511 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6512 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6513 6514 PetscCall(PetscFree(Aimap1)); 6515 PetscCall(PetscFree(Ajmap1)); 6516 PetscCall(PetscFree(Bimap1)); 6517 PetscCall(PetscFree(Bjmap1)); 6518 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6519 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6520 PetscCall(PetscFree3(i1,j1,perm1)); 6521 PetscCall(PetscFree3(i2,j2,perm2)); 6522 6523 Ajmap1 = Ajmap1_new; 6524 Bjmap1 = Bjmap1_new; 6525 6526 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6527 if (Annz < Annz1 + Annz2) { 6528 PetscInt *Aj_new; 6529 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6530 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6531 PetscCall(PetscFree(Aj)); 6532 Aj = Aj_new; 6533 } 6534 6535 if (Bnnz < Bnnz1 + Bnnz2) { 6536 PetscInt *Bj_new; 6537 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6538 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6539 PetscCall(PetscFree(Bj)); 6540 Bj = Bj_new; 6541 } 6542 6543 /* --------------------------------------------------------------------------------*/ 6544 /* Create new submatrices for on-process and off-process coupling */ 6545 /* --------------------------------------------------------------------------------*/ 6546 PetscScalar *Aa,*Ba; 6547 MatType rtype; 6548 Mat_SeqAIJ *a,*b; 6549 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6550 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6551 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6552 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6553 PetscCall(MatDestroy(&mpiaij->A)); 6554 PetscCall(MatDestroy(&mpiaij->B)); 6555 PetscCall(MatGetRootType_Private(mat,&rtype)); 6556 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6557 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6558 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6559 6560 a = (Mat_SeqAIJ*)mpiaij->A->data; 6561 b = (Mat_SeqAIJ*)mpiaij->B->data; 6562 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6563 a->free_a = b->free_a = PETSC_TRUE; 6564 a->free_ij = b->free_ij = PETSC_TRUE; 6565 6566 /* conversion must happen AFTER multiply setup */ 6567 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6568 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6569 PetscCall(VecDestroy(&mpiaij->lvec)); 6570 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6571 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6572 6573 mpiaij->coo_n = coo_n; 6574 mpiaij->coo_sf = sf2; 6575 mpiaij->sendlen = nleaves; 6576 mpiaij->recvlen = nroots; 6577 6578 mpiaij->Annz = Annz; 6579 mpiaij->Bnnz = Bnnz; 6580 6581 mpiaij->Annz2 = Annz2; 6582 mpiaij->Bnnz2 = Bnnz2; 6583 6584 mpiaij->Atot1 = Atot1; 6585 mpiaij->Atot2 = Atot2; 6586 mpiaij->Btot1 = Btot1; 6587 mpiaij->Btot2 = Btot2; 6588 6589 mpiaij->Ajmap1 = Ajmap1; 6590 mpiaij->Aperm1 = Aperm1; 6591 6592 mpiaij->Bjmap1 = Bjmap1; 6593 mpiaij->Bperm1 = Bperm1; 6594 6595 mpiaij->Aimap2 = Aimap2; 6596 mpiaij->Ajmap2 = Ajmap2; 6597 mpiaij->Aperm2 = Aperm2; 6598 6599 mpiaij->Bimap2 = Bimap2; 6600 mpiaij->Bjmap2 = Bjmap2; 6601 mpiaij->Bperm2 = Bperm2; 6602 6603 mpiaij->Cperm1 = Cperm1; 6604 6605 /* Allocate in preallocation. If not used, it has zero cost on host */ 6606 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6607 PetscFunctionReturn(0); 6608 } 6609 6610 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6611 { 6612 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6613 Mat A = mpiaij->A,B = mpiaij->B; 6614 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6615 PetscScalar *Aa,*Ba; 6616 PetscScalar *sendbuf = mpiaij->sendbuf; 6617 PetscScalar *recvbuf = mpiaij->recvbuf; 6618 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6619 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6620 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6621 const PetscCount *Cperm1 = mpiaij->Cperm1; 6622 6623 PetscFunctionBegin; 6624 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6625 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6626 6627 /* Pack entries to be sent to remote */ 6628 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6629 6630 /* Send remote entries to their owner and overlap the communication with local computation */ 6631 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6632 /* Add local entries to A and B */ 6633 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6634 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6635 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6636 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6637 } 6638 for (PetscCount i=0; i<Bnnz; i++) { 6639 PetscScalar sum = 0.0; 6640 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6641 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6642 } 6643 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6644 6645 /* Add received remote entries to A and B */ 6646 for (PetscCount i=0; i<Annz2; i++) { 6647 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6648 } 6649 for (PetscCount i=0; i<Bnnz2; i++) { 6650 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6651 } 6652 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6653 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6654 PetscFunctionReturn(0); 6655 } 6656 6657 /* ----------------------------------------------------------------*/ 6658 6659 /*MC 6660 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6661 6662 Options Database Keys: 6663 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6664 6665 Level: beginner 6666 6667 Notes: 6668 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6669 in this case the values associated with the rows and columns one passes in are set to zero 6670 in the matrix 6671 6672 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6673 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6674 6675 .seealso: `MatCreateAIJ()` 6676 M*/ 6677 6678 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6679 { 6680 Mat_MPIAIJ *b; 6681 PetscMPIInt size; 6682 6683 PetscFunctionBegin; 6684 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6685 6686 PetscCall(PetscNewLog(B,&b)); 6687 B->data = (void*)b; 6688 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6689 B->assembled = PETSC_FALSE; 6690 B->insertmode = NOT_SET_VALUES; 6691 b->size = size; 6692 6693 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6694 6695 /* build cache for off array entries formed */ 6696 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6697 6698 b->donotstash = PETSC_FALSE; 6699 b->colmap = NULL; 6700 b->garray = NULL; 6701 b->roworiented = PETSC_TRUE; 6702 6703 /* stuff used for matrix vector multiply */ 6704 b->lvec = NULL; 6705 b->Mvctx = NULL; 6706 6707 /* stuff for MatGetRow() */ 6708 b->rowindices = NULL; 6709 b->rowvalues = NULL; 6710 b->getrowactive = PETSC_FALSE; 6711 6712 /* flexible pointer used in CUSPARSE classes */ 6713 b->spptr = NULL; 6714 6715 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6716 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6717 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6718 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6719 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6720 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6721 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6722 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6723 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6724 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6725 #if defined(PETSC_HAVE_CUDA) 6726 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6727 #endif 6728 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6729 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6730 #endif 6731 #if defined(PETSC_HAVE_MKL_SPARSE) 6732 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6733 #endif 6734 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6735 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6736 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6737 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6738 #if defined(PETSC_HAVE_ELEMENTAL) 6739 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6740 #endif 6741 #if defined(PETSC_HAVE_SCALAPACK) 6742 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6743 #endif 6744 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6745 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6746 #if defined(PETSC_HAVE_HYPRE) 6747 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6748 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6749 #endif 6750 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6751 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6752 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6753 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6754 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6755 PetscFunctionReturn(0); 6756 } 6757 6758 /*@C 6759 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6760 and "off-diagonal" part of the matrix in CSR format. 6761 6762 Collective 6763 6764 Input Parameters: 6765 + comm - MPI communicator 6766 . m - number of local rows (Cannot be PETSC_DECIDE) 6767 . n - This value should be the same as the local size used in creating the 6768 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6769 calculated if N is given) For square matrices n is almost always m. 6770 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6771 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6772 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6773 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6774 . a - matrix values 6775 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6776 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6777 - oa - matrix values 6778 6779 Output Parameter: 6780 . mat - the matrix 6781 6782 Level: advanced 6783 6784 Notes: 6785 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6786 must free the arrays once the matrix has been destroyed and not before. 6787 6788 The i and j indices are 0 based 6789 6790 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6791 6792 This sets local rows and cannot be used to set off-processor values. 6793 6794 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6795 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6796 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6797 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6798 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6799 communication if it is known that only local entries will be set. 6800 6801 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6802 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6803 @*/ 6804 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6805 { 6806 Mat_MPIAIJ *maij; 6807 6808 PetscFunctionBegin; 6809 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6810 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6811 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6812 PetscCall(MatCreate(comm,mat)); 6813 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6814 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6815 maij = (Mat_MPIAIJ*) (*mat)->data; 6816 6817 (*mat)->preallocated = PETSC_TRUE; 6818 6819 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6820 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6821 6822 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6823 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6824 6825 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6826 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6827 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6828 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6829 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6830 PetscFunctionReturn(0); 6831 } 6832 6833 typedef struct { 6834 Mat *mp; /* intermediate products */ 6835 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6836 PetscInt cp; /* number of intermediate products */ 6837 6838 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6839 PetscInt *startsj_s,*startsj_r; 6840 PetscScalar *bufa; 6841 Mat P_oth; 6842 6843 /* may take advantage of merging product->B */ 6844 Mat Bloc; /* B-local by merging diag and off-diag */ 6845 6846 /* cusparse does not have support to split between symbolic and numeric phases. 6847 When api_user is true, we don't need to update the numerical values 6848 of the temporary storage */ 6849 PetscBool reusesym; 6850 6851 /* support for COO values insertion */ 6852 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6853 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6854 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6855 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6856 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6857 PetscMemType mtype; 6858 6859 /* customization */ 6860 PetscBool abmerge; 6861 PetscBool P_oth_bind; 6862 } MatMatMPIAIJBACKEND; 6863 6864 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6865 { 6866 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6867 PetscInt i; 6868 6869 PetscFunctionBegin; 6870 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6871 PetscCall(PetscFree(mmdata->bufa)); 6872 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6873 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6874 PetscCall(MatDestroy(&mmdata->P_oth)); 6875 PetscCall(MatDestroy(&mmdata->Bloc)); 6876 PetscCall(PetscSFDestroy(&mmdata->sf)); 6877 for (i = 0; i < mmdata->cp; i++) { 6878 PetscCall(MatDestroy(&mmdata->mp[i])); 6879 } 6880 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6881 PetscCall(PetscFree(mmdata->own[0])); 6882 PetscCall(PetscFree(mmdata->own)); 6883 PetscCall(PetscFree(mmdata->off[0])); 6884 PetscCall(PetscFree(mmdata->off)); 6885 PetscCall(PetscFree(mmdata)); 6886 PetscFunctionReturn(0); 6887 } 6888 6889 /* Copy selected n entries with indices in idx[] of A to v[]. 6890 If idx is NULL, copy the whole data array of A to v[] 6891 */ 6892 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6893 { 6894 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6895 6896 PetscFunctionBegin; 6897 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6898 if (f) { 6899 PetscCall((*f)(A,n,idx,v)); 6900 } else { 6901 const PetscScalar *vv; 6902 6903 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6904 if (n && idx) { 6905 PetscScalar *w = v; 6906 const PetscInt *oi = idx; 6907 PetscInt j; 6908 6909 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6910 } else { 6911 PetscCall(PetscArraycpy(v,vv,n)); 6912 } 6913 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6914 } 6915 PetscFunctionReturn(0); 6916 } 6917 6918 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6919 { 6920 MatMatMPIAIJBACKEND *mmdata; 6921 PetscInt i,n_d,n_o; 6922 6923 PetscFunctionBegin; 6924 MatCheckProduct(C,1); 6925 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6926 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6927 if (!mmdata->reusesym) { /* update temporary matrices */ 6928 if (mmdata->P_oth) { 6929 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6930 } 6931 if (mmdata->Bloc) { 6932 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6933 } 6934 } 6935 mmdata->reusesym = PETSC_FALSE; 6936 6937 for (i = 0; i < mmdata->cp; i++) { 6938 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6939 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6940 } 6941 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6942 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6943 6944 if (mmdata->mptmp[i]) continue; 6945 if (noff) { 6946 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6947 6948 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6949 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6950 n_o += noff; 6951 n_d += nown; 6952 } else { 6953 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6954 6955 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6956 n_d += mm->nz; 6957 } 6958 } 6959 if (mmdata->hasoffproc) { /* offprocess insertion */ 6960 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6961 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6962 } 6963 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6964 PetscFunctionReturn(0); 6965 } 6966 6967 /* Support for Pt * A, A * P, or Pt * A * P */ 6968 #define MAX_NUMBER_INTERMEDIATE 4 6969 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6970 { 6971 Mat_Product *product = C->product; 6972 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6973 Mat_MPIAIJ *a,*p; 6974 MatMatMPIAIJBACKEND *mmdata; 6975 ISLocalToGlobalMapping P_oth_l2g = NULL; 6976 IS glob = NULL; 6977 const char *prefix; 6978 char pprefix[256]; 6979 const PetscInt *globidx,*P_oth_idx; 6980 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6981 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6982 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6983 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6984 /* a base offset; type-2: sparse with a local to global map table */ 6985 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6986 6987 MatProductType ptype; 6988 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6989 PetscMPIInt size; 6990 6991 PetscFunctionBegin; 6992 MatCheckProduct(C,1); 6993 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6994 ptype = product->type; 6995 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6996 ptype = MATPRODUCT_AB; 6997 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6998 } 6999 switch (ptype) { 7000 case MATPRODUCT_AB: 7001 A = product->A; 7002 P = product->B; 7003 m = A->rmap->n; 7004 n = P->cmap->n; 7005 M = A->rmap->N; 7006 N = P->cmap->N; 7007 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7008 break; 7009 case MATPRODUCT_AtB: 7010 P = product->A; 7011 A = product->B; 7012 m = P->cmap->n; 7013 n = A->cmap->n; 7014 M = P->cmap->N; 7015 N = A->cmap->N; 7016 hasoffproc = PETSC_TRUE; 7017 break; 7018 case MATPRODUCT_PtAP: 7019 A = product->A; 7020 P = product->B; 7021 m = P->cmap->n; 7022 n = P->cmap->n; 7023 M = P->cmap->N; 7024 N = P->cmap->N; 7025 hasoffproc = PETSC_TRUE; 7026 break; 7027 default: 7028 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7029 } 7030 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 7031 if (size == 1) hasoffproc = PETSC_FALSE; 7032 7033 /* defaults */ 7034 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 7035 mp[i] = NULL; 7036 mptmp[i] = PETSC_FALSE; 7037 rmapt[i] = -1; 7038 cmapt[i] = -1; 7039 rmapa[i] = NULL; 7040 cmapa[i] = NULL; 7041 } 7042 7043 /* customization */ 7044 PetscCall(PetscNew(&mmdata)); 7045 mmdata->reusesym = product->api_user; 7046 if (ptype == MATPRODUCT_AB) { 7047 if (product->api_user) { 7048 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 7049 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7050 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7051 PetscOptionsEnd(); 7052 } else { 7053 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 7054 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7055 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7056 PetscOptionsEnd(); 7057 } 7058 } else if (ptype == MATPRODUCT_PtAP) { 7059 if (product->api_user) { 7060 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 7061 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7062 PetscOptionsEnd(); 7063 } else { 7064 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7065 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7066 PetscOptionsEnd(); 7067 } 7068 } 7069 a = (Mat_MPIAIJ*)A->data; 7070 p = (Mat_MPIAIJ*)P->data; 7071 PetscCall(MatSetSizes(C,m,n,M,N)); 7072 PetscCall(PetscLayoutSetUp(C->rmap)); 7073 PetscCall(PetscLayoutSetUp(C->cmap)); 7074 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7075 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7076 7077 cp = 0; 7078 switch (ptype) { 7079 case MATPRODUCT_AB: /* A * P */ 7080 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7081 7082 /* A_diag * P_local (merged or not) */ 7083 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7084 /* P is product->B */ 7085 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7086 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7087 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7088 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7089 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7090 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7091 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7092 mp[cp]->product->api_user = product->api_user; 7093 PetscCall(MatProductSetFromOptions(mp[cp])); 7094 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7095 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7096 PetscCall(ISGetIndices(glob,&globidx)); 7097 rmapt[cp] = 1; 7098 cmapt[cp] = 2; 7099 cmapa[cp] = globidx; 7100 mptmp[cp] = PETSC_FALSE; 7101 cp++; 7102 } else { /* A_diag * P_diag and A_diag * P_off */ 7103 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7104 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7105 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7106 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7107 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7108 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7109 mp[cp]->product->api_user = product->api_user; 7110 PetscCall(MatProductSetFromOptions(mp[cp])); 7111 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7112 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7113 rmapt[cp] = 1; 7114 cmapt[cp] = 1; 7115 mptmp[cp] = PETSC_FALSE; 7116 cp++; 7117 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7118 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7119 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7120 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7121 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7122 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7123 mp[cp]->product->api_user = product->api_user; 7124 PetscCall(MatProductSetFromOptions(mp[cp])); 7125 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7126 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7127 rmapt[cp] = 1; 7128 cmapt[cp] = 2; 7129 cmapa[cp] = p->garray; 7130 mptmp[cp] = PETSC_FALSE; 7131 cp++; 7132 } 7133 7134 /* A_off * P_other */ 7135 if (mmdata->P_oth) { 7136 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7137 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7138 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7139 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7140 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7141 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7142 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7143 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7144 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7145 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7146 mp[cp]->product->api_user = product->api_user; 7147 PetscCall(MatProductSetFromOptions(mp[cp])); 7148 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7149 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7150 rmapt[cp] = 1; 7151 cmapt[cp] = 2; 7152 cmapa[cp] = P_oth_idx; 7153 mptmp[cp] = PETSC_FALSE; 7154 cp++; 7155 } 7156 break; 7157 7158 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7159 /* A is product->B */ 7160 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7161 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7162 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7163 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7164 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7165 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7166 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7167 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7168 mp[cp]->product->api_user = product->api_user; 7169 PetscCall(MatProductSetFromOptions(mp[cp])); 7170 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7171 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7172 PetscCall(ISGetIndices(glob,&globidx)); 7173 rmapt[cp] = 2; 7174 rmapa[cp] = globidx; 7175 cmapt[cp] = 2; 7176 cmapa[cp] = globidx; 7177 mptmp[cp] = PETSC_FALSE; 7178 cp++; 7179 } else { 7180 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7181 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7182 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7183 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7184 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7185 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7186 mp[cp]->product->api_user = product->api_user; 7187 PetscCall(MatProductSetFromOptions(mp[cp])); 7188 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7189 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7190 PetscCall(ISGetIndices(glob,&globidx)); 7191 rmapt[cp] = 1; 7192 cmapt[cp] = 2; 7193 cmapa[cp] = globidx; 7194 mptmp[cp] = PETSC_FALSE; 7195 cp++; 7196 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7197 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7198 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7199 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7200 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7201 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7202 mp[cp]->product->api_user = product->api_user; 7203 PetscCall(MatProductSetFromOptions(mp[cp])); 7204 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7205 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7206 rmapt[cp] = 2; 7207 rmapa[cp] = p->garray; 7208 cmapt[cp] = 2; 7209 cmapa[cp] = globidx; 7210 mptmp[cp] = PETSC_FALSE; 7211 cp++; 7212 } 7213 break; 7214 case MATPRODUCT_PtAP: 7215 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7216 /* P is product->B */ 7217 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7218 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7219 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7220 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7221 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7222 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7223 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7224 mp[cp]->product->api_user = product->api_user; 7225 PetscCall(MatProductSetFromOptions(mp[cp])); 7226 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7227 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7228 PetscCall(ISGetIndices(glob,&globidx)); 7229 rmapt[cp] = 2; 7230 rmapa[cp] = globidx; 7231 cmapt[cp] = 2; 7232 cmapa[cp] = globidx; 7233 mptmp[cp] = PETSC_FALSE; 7234 cp++; 7235 if (mmdata->P_oth) { 7236 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7237 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7238 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7239 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7240 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7241 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7242 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7243 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7244 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7245 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7246 mp[cp]->product->api_user = product->api_user; 7247 PetscCall(MatProductSetFromOptions(mp[cp])); 7248 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7249 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7250 mptmp[cp] = PETSC_TRUE; 7251 cp++; 7252 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7253 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7254 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7255 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7256 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7257 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7258 mp[cp]->product->api_user = product->api_user; 7259 PetscCall(MatProductSetFromOptions(mp[cp])); 7260 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7261 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7262 rmapt[cp] = 2; 7263 rmapa[cp] = globidx; 7264 cmapt[cp] = 2; 7265 cmapa[cp] = P_oth_idx; 7266 mptmp[cp] = PETSC_FALSE; 7267 cp++; 7268 } 7269 break; 7270 default: 7271 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7272 } 7273 /* sanity check */ 7274 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7275 7276 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7277 for (i = 0; i < cp; i++) { 7278 mmdata->mp[i] = mp[i]; 7279 mmdata->mptmp[i] = mptmp[i]; 7280 } 7281 mmdata->cp = cp; 7282 C->product->data = mmdata; 7283 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7284 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7285 7286 /* memory type */ 7287 mmdata->mtype = PETSC_MEMTYPE_HOST; 7288 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7289 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7290 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7291 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7292 7293 /* prepare coo coordinates for values insertion */ 7294 7295 /* count total nonzeros of those intermediate seqaij Mats 7296 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7297 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7298 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7299 */ 7300 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7301 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7302 if (mptmp[cp]) continue; 7303 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7304 const PetscInt *rmap = rmapa[cp]; 7305 const PetscInt mr = mp[cp]->rmap->n; 7306 const PetscInt rs = C->rmap->rstart; 7307 const PetscInt re = C->rmap->rend; 7308 const PetscInt *ii = mm->i; 7309 for (i = 0; i < mr; i++) { 7310 const PetscInt gr = rmap[i]; 7311 const PetscInt nz = ii[i+1] - ii[i]; 7312 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7313 else ncoo_oown += nz; /* this row is local */ 7314 } 7315 } else ncoo_d += mm->nz; 7316 } 7317 7318 /* 7319 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7320 7321 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7322 7323 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7324 7325 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7326 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7327 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7328 7329 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7330 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7331 */ 7332 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7333 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7334 7335 /* gather (i,j) of nonzeros inserted by remote procs */ 7336 if (hasoffproc) { 7337 PetscSF msf; 7338 PetscInt ncoo2,*coo_i2,*coo_j2; 7339 7340 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7341 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7342 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7343 7344 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7345 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7346 PetscInt *idxoff = mmdata->off[cp]; 7347 PetscInt *idxown = mmdata->own[cp]; 7348 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7349 const PetscInt *rmap = rmapa[cp]; 7350 const PetscInt *cmap = cmapa[cp]; 7351 const PetscInt *ii = mm->i; 7352 PetscInt *coi = coo_i + ncoo_o; 7353 PetscInt *coj = coo_j + ncoo_o; 7354 const PetscInt mr = mp[cp]->rmap->n; 7355 const PetscInt rs = C->rmap->rstart; 7356 const PetscInt re = C->rmap->rend; 7357 const PetscInt cs = C->cmap->rstart; 7358 for (i = 0; i < mr; i++) { 7359 const PetscInt *jj = mm->j + ii[i]; 7360 const PetscInt gr = rmap[i]; 7361 const PetscInt nz = ii[i+1] - ii[i]; 7362 if (gr < rs || gr >= re) { /* this is an offproc row */ 7363 for (j = ii[i]; j < ii[i+1]; j++) { 7364 *coi++ = gr; 7365 *idxoff++ = j; 7366 } 7367 if (!cmapt[cp]) { /* already global */ 7368 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7369 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7370 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7371 } else { /* offdiag */ 7372 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7373 } 7374 ncoo_o += nz; 7375 } else { /* this is a local row */ 7376 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7377 } 7378 } 7379 } 7380 mmdata->off[cp + 1] = idxoff; 7381 mmdata->own[cp + 1] = idxown; 7382 } 7383 7384 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7385 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7386 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7387 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7388 ncoo = ncoo_d + ncoo_oown + ncoo2; 7389 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7390 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7391 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7392 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7393 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7394 PetscCall(PetscFree2(coo_i,coo_j)); 7395 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7396 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7397 coo_i = coo_i2; 7398 coo_j = coo_j2; 7399 } else { /* no offproc values insertion */ 7400 ncoo = ncoo_d; 7401 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7402 7403 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7404 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7405 PetscCall(PetscSFSetUp(mmdata->sf)); 7406 } 7407 mmdata->hasoffproc = hasoffproc; 7408 7409 /* gather (i,j) of nonzeros inserted locally */ 7410 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7411 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7412 PetscInt *coi = coo_i + ncoo_d; 7413 PetscInt *coj = coo_j + ncoo_d; 7414 const PetscInt *jj = mm->j; 7415 const PetscInt *ii = mm->i; 7416 const PetscInt *cmap = cmapa[cp]; 7417 const PetscInt *rmap = rmapa[cp]; 7418 const PetscInt mr = mp[cp]->rmap->n; 7419 const PetscInt rs = C->rmap->rstart; 7420 const PetscInt re = C->rmap->rend; 7421 const PetscInt cs = C->cmap->rstart; 7422 7423 if (mptmp[cp]) continue; 7424 if (rmapt[cp] == 1) { /* consecutive rows */ 7425 /* fill coo_i */ 7426 for (i = 0; i < mr; i++) { 7427 const PetscInt gr = i + rs; 7428 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7429 } 7430 /* fill coo_j */ 7431 if (!cmapt[cp]) { /* type-0, already global */ 7432 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7433 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7434 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7435 } else { /* type-2, local to global for sparse columns */ 7436 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7437 } 7438 ncoo_d += mm->nz; 7439 } else if (rmapt[cp] == 2) { /* sparse rows */ 7440 for (i = 0; i < mr; i++) { 7441 const PetscInt *jj = mm->j + ii[i]; 7442 const PetscInt gr = rmap[i]; 7443 const PetscInt nz = ii[i+1] - ii[i]; 7444 if (gr >= rs && gr < re) { /* local rows */ 7445 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7446 if (!cmapt[cp]) { /* type-0, already global */ 7447 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7448 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7449 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7450 } else { /* type-2, local to global for sparse columns */ 7451 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7452 } 7453 ncoo_d += nz; 7454 } 7455 } 7456 } 7457 } 7458 if (glob) { 7459 PetscCall(ISRestoreIndices(glob,&globidx)); 7460 } 7461 PetscCall(ISDestroy(&glob)); 7462 if (P_oth_l2g) { 7463 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7464 } 7465 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7466 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7467 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7468 7469 /* preallocate with COO data */ 7470 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7471 PetscCall(PetscFree2(coo_i,coo_j)); 7472 PetscFunctionReturn(0); 7473 } 7474 7475 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7476 { 7477 Mat_Product *product = mat->product; 7478 #if defined(PETSC_HAVE_DEVICE) 7479 PetscBool match = PETSC_FALSE; 7480 PetscBool usecpu = PETSC_FALSE; 7481 #else 7482 PetscBool match = PETSC_TRUE; 7483 #endif 7484 7485 PetscFunctionBegin; 7486 MatCheckProduct(mat,1); 7487 #if defined(PETSC_HAVE_DEVICE) 7488 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7489 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7490 } 7491 if (match) { /* we can always fallback to the CPU if requested */ 7492 switch (product->type) { 7493 case MATPRODUCT_AB: 7494 if (product->api_user) { 7495 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7496 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7497 PetscOptionsEnd(); 7498 } else { 7499 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7500 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7501 PetscOptionsEnd(); 7502 } 7503 break; 7504 case MATPRODUCT_AtB: 7505 if (product->api_user) { 7506 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7507 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7508 PetscOptionsEnd(); 7509 } else { 7510 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7511 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7512 PetscOptionsEnd(); 7513 } 7514 break; 7515 case MATPRODUCT_PtAP: 7516 if (product->api_user) { 7517 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7518 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7519 PetscOptionsEnd(); 7520 } else { 7521 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7522 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7523 PetscOptionsEnd(); 7524 } 7525 break; 7526 default: 7527 break; 7528 } 7529 match = (PetscBool)!usecpu; 7530 } 7531 #endif 7532 if (match) { 7533 switch (product->type) { 7534 case MATPRODUCT_AB: 7535 case MATPRODUCT_AtB: 7536 case MATPRODUCT_PtAP: 7537 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7538 break; 7539 default: 7540 break; 7541 } 7542 } 7543 /* fallback to MPIAIJ ops */ 7544 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7545 PetscFunctionReturn(0); 7546 } 7547 7548 /* 7549 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7550 7551 n - the number of block indices in cc[] 7552 cc - the block indices (must be large enough to contain the indices) 7553 */ 7554 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 7555 { 7556 PetscInt cnt = -1,nidx,j; 7557 const PetscInt *idx; 7558 7559 PetscFunctionBegin; 7560 PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); 7561 if (nidx) { 7562 cnt = 0; 7563 cc[cnt] = idx[0]/bs; 7564 for (j=1; j<nidx; j++) { 7565 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 7566 } 7567 } 7568 PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL)); 7569 *n = cnt+1; 7570 PetscFunctionReturn(0); 7571 } 7572 7573 /* 7574 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7575 7576 ncollapsed - the number of block indices 7577 collapsed - the block indices (must be large enough to contain the indices) 7578 */ 7579 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 7580 { 7581 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 7582 7583 PetscFunctionBegin; 7584 PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev)); 7585 for (i=start+1; i<start+bs; i++) { 7586 PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur)); 7587 PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged)); 7588 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 7589 } 7590 *ncollapsed = nprev; 7591 if (collapsed) *collapsed = cprev; 7592 PetscFunctionReturn(0); 7593 } 7594 7595 /* -------------------------------------------------------------------------- */ 7596 /* 7597 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7598 7599 Input Parameter: 7600 . Amat - matrix 7601 - symmetrize - make the result symmetric 7602 + scale - scale with diagonal 7603 7604 Output Parameter: 7605 . a_Gmat - output scalar graph >= 0 7606 7607 */ 7608 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) 7609 { 7610 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 7611 MPI_Comm comm; 7612 Mat Gmat; 7613 PetscBool ismpiaij,isseqaij; 7614 Mat a, b, c; 7615 MatType jtype; 7616 7617 PetscFunctionBegin; 7618 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 7619 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7620 PetscCall(MatGetSize(Amat, &MM, &NN)); 7621 PetscCall(MatGetBlockSize(Amat, &bs)); 7622 nloc = (Iend-Istart)/bs; 7623 7624 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 7625 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 7626 PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 7627 7628 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7629 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7630 implementation */ 7631 if (bs > 1) { 7632 PetscCall(MatGetType(Amat,&jtype)); 7633 PetscCall(MatCreate(comm, &Gmat)); 7634 PetscCall(MatSetType(Gmat, jtype)); 7635 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 7636 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7637 if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) { 7638 PetscInt *d_nnz, *o_nnz; 7639 MatScalar *aa,val,AA[4096]; 7640 PetscInt *aj,*ai,AJ[4096],nc; 7641 if (isseqaij) { a = Amat; b = NULL; } 7642 else { 7643 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 7644 a = d->A; b = d->B; 7645 } 7646 PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc)); 7647 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7648 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7649 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 7650 const PetscInt *cols; 7651 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 7652 PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); 7653 nnz[brow/bs] = jj/bs; 7654 if (jj%bs) ok = 0; 7655 if (cols) j0 = cols[0]; 7656 else j0 = -1; 7657 PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); 7658 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 7659 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 7660 PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); 7661 if (jj%bs) ok = 0; 7662 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7663 if (nnz[brow/bs] != jj/bs) ok = 0; 7664 PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); 7665 } 7666 if (!ok) { 7667 PetscCall(PetscFree2(d_nnz,o_nnz)); 7668 goto old_bs; 7669 } 7670 } 7671 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax); 7672 } 7673 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7674 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7675 PetscCall(PetscFree2(d_nnz,o_nnz)); 7676 // diag 7677 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 7678 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 7679 ai = aseq->i; 7680 n = ai[brow+1] - ai[brow]; 7681 aj = aseq->j + ai[brow]; 7682 for (int k=0; k<n; k += bs) { // block columns 7683 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 7684 val = 0; 7685 for (int ii=0; ii<bs; ii++) { // rows in block 7686 aa = aseq->a + ai[brow+ii] + k; 7687 for (int jj=0; jj<bs; jj++) { // columns in block 7688 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7689 } 7690 } 7691 AA[k/bs] = val; 7692 } 7693 grow = Istart/bs + brow/bs; 7694 PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES)); 7695 } 7696 // off-diag 7697 if (ismpiaij) { 7698 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 7699 const PetscScalar *vals; 7700 const PetscInt *cols, *garray = aij->garray; 7701 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 7702 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 7703 PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); 7704 for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) { 7705 AA[k/bs] = 0; 7706 AJ[cidx] = garray[cols[k]]/bs; 7707 } 7708 nc = ncols/bs; 7709 PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); 7710 for (int ii=0; ii<bs; ii++) { // rows in block 7711 PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals)); 7712 for (int k=0; k<ncols; k += bs) { 7713 for (int jj=0; jj<bs; jj++) { // cols in block 7714 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 7715 } 7716 } 7717 PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals)); 7718 } 7719 grow = Istart/bs + brow/bs; 7720 PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES)); 7721 } 7722 } 7723 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7724 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7725 } else { 7726 const PetscScalar *vals; 7727 const PetscInt *idx; 7728 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 7729 old_bs: 7730 /* 7731 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7732 */ 7733 PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n")); 7734 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7735 if (isseqaij) { 7736 PetscInt max_d_nnz; 7737 /* 7738 Determine exact preallocation count for (sequential) scalar matrix 7739 */ 7740 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); 7741 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7742 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7743 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 7744 PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7745 } 7746 PetscCall(PetscFree3(w0,w1,w2)); 7747 } else if (ismpiaij) { 7748 Mat Daij,Oaij; 7749 const PetscInt *garray; 7750 PetscInt max_d_nnz; 7751 PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); 7752 /* 7753 Determine exact preallocation count for diagonal block portion of scalar matrix 7754 */ 7755 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); 7756 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7757 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7758 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7759 PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7760 } 7761 PetscCall(PetscFree3(w0,w1,w2)); 7762 /* 7763 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7764 */ 7765 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7766 o_nnz[jj] = 0; 7767 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 7768 PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7769 o_nnz[jj] += ncols; 7770 PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7771 } 7772 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 7773 } 7774 } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type"); 7775 /* get scalar copy (norms) of matrix */ 7776 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7777 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7778 PetscCall(PetscFree2(d_nnz,o_nnz)); 7779 for (Ii = Istart; Ii < Iend; Ii++) { 7780 PetscInt dest_row = Ii/bs; 7781 PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); 7782 for (jj=0; jj<ncols; jj++) { 7783 PetscInt dest_col = idx[jj]/bs; 7784 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7785 PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES)); 7786 } 7787 PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals)); 7788 } 7789 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7790 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7791 } 7792 } else { 7793 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7794 procedure via MatAbs API */ 7795 /* just copy scalar matrix & abs() */ 7796 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7797 if (isseqaij) { a = Gmat; b = NULL; } 7798 else { 7799 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7800 a = d->A; b = d->B; 7801 } 7802 /* abs */ 7803 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7804 MatInfo info; 7805 PetscScalar *avals; 7806 PetscCall(MatGetInfo(c,MAT_LOCAL,&info)); 7807 PetscCall(MatSeqAIJGetArray(c,&avals)); 7808 for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7809 PetscCall(MatSeqAIJRestoreArray(c,&avals)); 7810 } 7811 } 7812 if (symmetrize) { 7813 PetscBool issym; 7814 PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym)); 7815 if (!issym) { 7816 Mat matTrans; 7817 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7818 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7819 PetscCall(MatDestroy(&matTrans)); 7820 } 7821 PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE)); 7822 } else { 7823 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7824 } 7825 if (scale) { 7826 /* scale c for all diagonal values = 1 or -1 */ 7827 Vec diag; 7828 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7829 PetscCall(MatGetDiagonal(Gmat, diag)); 7830 PetscCall(VecReciprocal(diag)); 7831 PetscCall(VecSqrtAbs(diag)); 7832 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7833 PetscCall(VecDestroy(&diag)); 7834 } 7835 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7836 *a_Gmat = Gmat; 7837 PetscFunctionReturn(0); 7838 } 7839 7840 /* -------------------------------------------------------------------------- */ 7841 /*@C 7842 MatFilter_AIJ - filter values with small absolute values 7843 With vfilter < 0 does nothing so should not be called. 7844 7845 Collective on Mat 7846 7847 Input Parameters: 7848 + Gmat - the graph 7849 . vfilter - threshold parameter [0,1) 7850 7851 Output Parameter: 7852 . filteredG - output filtered scalar graph 7853 7854 Level: developer 7855 7856 Notes: 7857 This is called before graph coarsers are called. 7858 This could go into Mat, move 'symm' to GAMG 7859 7860 .seealso: `PCGAMGSetThreshold()` 7861 @*/ 7862 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG) 7863 { 7864 PetscInt Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc; 7865 Mat tGmat; 7866 MPI_Comm comm; 7867 const PetscScalar *vals; 7868 const PetscInt *idx; 7869 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0; 7870 MatScalar *AA; // this is checked in graph 7871 PetscBool isseqaij; 7872 Mat a, b, c; 7873 MatType jtype; 7874 7875 PetscFunctionBegin; 7876 PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); 7877 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij)); 7878 PetscCall(MatGetType(Gmat,&jtype)); 7879 PetscCall(MatCreate(comm, &tGmat)); 7880 PetscCall(MatSetType(tGmat, jtype)); 7881 7882 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7883 Also, if the matrix is symmetric, can we skip this 7884 operation? It can be very expensive on large matrices. */ 7885 7886 // global sizes 7887 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7888 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7889 nloc = Iend - Istart; 7890 PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); 7891 if (isseqaij) { a = Gmat; b = NULL; } 7892 else { 7893 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7894 a = d->A; b = d->B; 7895 garray = d->garray; 7896 } 7897 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7898 for (PetscInt row=0; row < nloc; row++) { 7899 PetscCall(MatGetRow(a,row,&ncols,NULL,NULL)); 7900 d_nnz[row] = ncols; 7901 if (ncols>maxcols) maxcols=ncols; 7902 PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL)); 7903 } 7904 if (b) { 7905 for (PetscInt row=0; row < nloc; row++) { 7906 PetscCall(MatGetRow(b,row,&ncols,NULL,NULL)); 7907 o_nnz[row] = ncols; 7908 if (ncols>maxcols) maxcols=ncols; 7909 PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL)); 7910 } 7911 } 7912 PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); 7913 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7914 PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); 7915 PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); 7916 PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 7917 PetscCall(PetscFree2(d_nnz,o_nnz)); 7918 // 7919 PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ)); 7920 nnz0 = nnz1 = 0; 7921 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7922 for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) { 7923 PetscCall(MatGetRow(c,row,&ncols,&idx,&vals)); 7924 for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) { 7925 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7926 if (PetscRealPart(sv) > vfilter) { 7927 nnz1++; 7928 PetscInt cid = idx[jj] + Istart; //diag 7929 if (c!=a) cid = garray[idx[jj]]; 7930 AA[ncol_row] = vals[jj]; 7931 AJ[ncol_row] = cid; 7932 ncol_row++; 7933 } 7934 } 7935 PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals)); 7936 PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES)); 7937 } 7938 } 7939 PetscCall(PetscFree2(AA,AJ)); 7940 PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); 7941 PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); 7942 PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */ 7943 7944 PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", 7945 (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter, 7946 (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols)); 7947 7948 *filteredG = tGmat; 7949 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7950 PetscFunctionReturn(0); 7951 } 7952 7953 /* 7954 Special version for direct calls from Fortran 7955 */ 7956 #include <petsc/private/fortranimpl.h> 7957 7958 /* Change these macros so can be used in void function */ 7959 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7960 #undef PetscCall 7961 #define PetscCall(...) do { \ 7962 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7963 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7964 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7965 return; \ 7966 } \ 7967 } while (0) 7968 7969 #undef SETERRQ 7970 #define SETERRQ(comm,ierr,...) do { \ 7971 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7972 return; \ 7973 } while (0) 7974 7975 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7976 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7977 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7978 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7979 #else 7980 #endif 7981 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7982 { 7983 Mat mat = *mmat; 7984 PetscInt m = *mm, n = *mn; 7985 InsertMode addv = *maddv; 7986 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7987 PetscScalar value; 7988 7989 MatCheckPreallocated(mat,1); 7990 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7991 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7992 { 7993 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7994 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7995 PetscBool roworiented = aij->roworiented; 7996 7997 /* Some Variables required in the macro */ 7998 Mat A = aij->A; 7999 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 8000 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 8001 MatScalar *aa; 8002 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8003 Mat B = aij->B; 8004 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 8005 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 8006 MatScalar *ba; 8007 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8008 * cannot use "#if defined" inside a macro. */ 8009 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8010 8011 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 8012 PetscInt nonew = a->nonew; 8013 MatScalar *ap1,*ap2; 8014 8015 PetscFunctionBegin; 8016 PetscCall(MatSeqAIJGetArray(A,&aa)); 8017 PetscCall(MatSeqAIJGetArray(B,&ba)); 8018 for (i=0; i<m; i++) { 8019 if (im[i] < 0) continue; 8020 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 8021 if (im[i] >= rstart && im[i] < rend) { 8022 row = im[i] - rstart; 8023 lastcol1 = -1; 8024 rp1 = aj + ai[row]; 8025 ap1 = aa + ai[row]; 8026 rmax1 = aimax[row]; 8027 nrow1 = ailen[row]; 8028 low1 = 0; 8029 high1 = nrow1; 8030 lastcol2 = -1; 8031 rp2 = bj + bi[row]; 8032 ap2 = ba + bi[row]; 8033 rmax2 = bimax[row]; 8034 nrow2 = bilen[row]; 8035 low2 = 0; 8036 high2 = nrow2; 8037 8038 for (j=0; j<n; j++) { 8039 if (roworiented) value = v[i*n+j]; 8040 else value = v[i+j*m]; 8041 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8042 if (in[j] >= cstart && in[j] < cend) { 8043 col = in[j] - cstart; 8044 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 8045 } else if (in[j] < 0) continue; 8046 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8047 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 8048 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 8049 } else { 8050 if (mat->was_assembled) { 8051 if (!aij->colmap) { 8052 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8053 } 8054 #if defined(PETSC_USE_CTABLE) 8055 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 8056 col--; 8057 #else 8058 col = aij->colmap[in[j]] - 1; 8059 #endif 8060 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 8061 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8062 col = in[j]; 8063 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8064 B = aij->B; 8065 b = (Mat_SeqAIJ*)B->data; 8066 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 8067 rp2 = bj + bi[row]; 8068 ap2 = ba + bi[row]; 8069 rmax2 = bimax[row]; 8070 nrow2 = bilen[row]; 8071 low2 = 0; 8072 high2 = nrow2; 8073 bm = aij->B->rmap->n; 8074 ba = b->a; 8075 inserted = PETSC_FALSE; 8076 } 8077 } else col = in[j]; 8078 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 8079 } 8080 } 8081 } else if (!aij->donotstash) { 8082 if (roworiented) { 8083 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8084 } else { 8085 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8086 } 8087 } 8088 } 8089 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 8090 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 8091 } 8092 PetscFunctionReturnVoid(); 8093 } 8094 8095 /* Undefining these here since they were redefined from their original definition above! No 8096 * other PETSc functions should be defined past this point, as it is impossible to recover the 8097 * original definitions */ 8098 #undef PetscCall 8099 #undef SETERRQ 8100