1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 66 { 67 PetscErrorCode ierr; 68 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 69 70 PetscFunctionBegin; 71 if (mat->A) { 72 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 73 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 74 } 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 79 { 80 PetscErrorCode ierr; 81 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 82 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 83 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 84 const PetscInt *ia,*ib; 85 const MatScalar *aa,*bb,*aav,*bav; 86 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 87 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 88 89 PetscFunctionBegin; 90 *keptrows = NULL; 91 92 ia = a->i; 93 ib = b->i; 94 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 95 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) { 100 cnt++; 101 goto ok1; 102 } 103 aa = aav + ia[i]; 104 for (j=0; j<na; j++) { 105 if (aa[j] != 0.0) goto ok1; 106 } 107 bb = bav + ib[i]; 108 for (j=0; j <nb; j++) { 109 if (bb[j] != 0.0) goto ok1; 110 } 111 cnt++; 112 ok1:; 113 } 114 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 115 if (!n0rows) { 116 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 117 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 121 cnt = 0; 122 for (i=0; i<m; i++) { 123 na = ia[i+1] - ia[i]; 124 nb = ib[i+1] - ib[i]; 125 if (!na && !nb) continue; 126 aa = aav + ia[i]; 127 for (j=0; j<na;j++) { 128 if (aa[j] != 0.0) { 129 rows[cnt++] = rstart + i; 130 goto ok2; 131 } 132 } 133 bb = bav + ib[i]; 134 for (j=0; j<nb; j++) { 135 if (bb[j] != 0.0) { 136 rows[cnt++] = rstart + i; 137 goto ok2; 138 } 139 } 140 ok2:; 141 } 142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 143 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 144 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 152 PetscBool cong; 153 154 PetscFunctionBegin; 155 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 156 if (Y->assembled && cong) { 157 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 158 } else { 159 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 160 } 161 PetscFunctionReturn(0); 162 } 163 164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 165 { 166 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 167 PetscErrorCode ierr; 168 PetscInt i,rstart,nrows,*rows; 169 170 PetscFunctionBegin; 171 *zrows = NULL; 172 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 173 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 174 for (i=0; i<nrows; i++) rows[i] += rstart; 175 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 180 { 181 PetscErrorCode ierr; 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 183 PetscInt i,n,*garray = aij->garray; 184 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 185 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 186 PetscReal *work; 187 const PetscScalar *dummy; 188 189 PetscFunctionBegin; 190 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 191 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 192 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 193 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 194 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 195 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 196 if (type == NORM_2) { 197 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 198 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 199 } 200 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 201 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 202 } 203 } else if (type == NORM_1) { 204 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 205 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 206 } 207 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 208 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 209 } 210 } else if (type == NORM_INFINITY) { 211 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 212 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 213 } 214 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 215 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 216 } 217 218 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 219 if (type == NORM_INFINITY) { 220 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 221 } else { 222 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 223 } 224 ierr = PetscFree(work);CHKERRQ(ierr); 225 if (type == NORM_2) { 226 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 227 } 228 PetscFunctionReturn(0); 229 } 230 231 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 232 { 233 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 234 IS sis,gis; 235 PetscErrorCode ierr; 236 const PetscInt *isis,*igis; 237 PetscInt n,*iis,nsis,ngis,rstart,i; 238 239 PetscFunctionBegin; 240 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 241 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 242 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 243 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 244 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 245 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 246 247 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 248 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 249 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 250 n = ngis + nsis; 251 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 252 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 253 for (i=0; i<n; i++) iis[i] += rstart; 254 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 255 256 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 257 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 258 ierr = ISDestroy(&sis);CHKERRQ(ierr); 259 ierr = ISDestroy(&gis);CHKERRQ(ierr); 260 PetscFunctionReturn(0); 261 } 262 263 /* 264 Local utility routine that creates a mapping from the global column 265 number to the local number in the off-diagonal part of the local 266 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 267 a slightly higher hash table cost; without it it is not scalable (each processor 268 has an order N integer array but is fast to access. 269 */ 270 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 271 { 272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 273 PetscErrorCode ierr; 274 PetscInt n = aij->B->cmap->n,i; 275 276 PetscFunctionBegin; 277 if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 278 #if defined(PETSC_USE_CTABLE) 279 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 280 for (i=0; i<n; i++) { 281 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 282 } 283 #else 284 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 285 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 286 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 287 #endif 288 PetscFunctionReturn(0); 289 } 290 291 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 292 { \ 293 if (col <= lastcol1) low1 = 0; \ 294 else high1 = nrow1; \ 295 lastcol1 = col;\ 296 while (high1-low1 > 5) { \ 297 t = (low1+high1)/2; \ 298 if (rp1[t] > col) high1 = t; \ 299 else low1 = t; \ 300 } \ 301 for (_i=low1; _i<high1; _i++) { \ 302 if (rp1[_i] > col) break; \ 303 if (rp1[_i] == col) { \ 304 if (addv == ADD_VALUES) { \ 305 ap1[_i] += value; \ 306 /* Not sure LogFlops will slow dow the code or not */ \ 307 (void)PetscLogFlops(1.0); \ 308 } \ 309 else ap1[_i] = value; \ 310 inserted = PETSC_TRUE; \ 311 goto a_noinsert; \ 312 } \ 313 } \ 314 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 315 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 316 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 317 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 318 N = nrow1++ - 1; a->nz++; high1++; \ 319 /* shift up all the later entries in this row */ \ 320 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 321 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 322 rp1[_i] = col; \ 323 ap1[_i] = value; \ 324 A->nonzerostate++;\ 325 a_noinsert: ; \ 326 ailen[row] = nrow1; \ 327 } 328 329 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 330 { \ 331 if (col <= lastcol2) low2 = 0; \ 332 else high2 = nrow2; \ 333 lastcol2 = col; \ 334 while (high2-low2 > 5) { \ 335 t = (low2+high2)/2; \ 336 if (rp2[t] > col) high2 = t; \ 337 else low2 = t; \ 338 } \ 339 for (_i=low2; _i<high2; _i++) { \ 340 if (rp2[_i] > col) break; \ 341 if (rp2[_i] == col) { \ 342 if (addv == ADD_VALUES) { \ 343 ap2[_i] += value; \ 344 (void)PetscLogFlops(1.0); \ 345 } \ 346 else ap2[_i] = value; \ 347 inserted = PETSC_TRUE; \ 348 goto b_noinsert; \ 349 } \ 350 } \ 351 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 352 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 353 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 354 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 355 N = nrow2++ - 1; b->nz++; high2++; \ 356 /* shift up all the later entries in this row */ \ 357 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 358 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 359 rp2[_i] = col; \ 360 ap2[_i] = value; \ 361 B->nonzerostate++; \ 362 b_noinsert: ; \ 363 bilen[row] = nrow2; \ 364 } 365 366 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 367 { 368 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 369 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 370 PetscErrorCode ierr; 371 PetscInt l,*garray = mat->garray,diag; 372 373 PetscFunctionBegin; 374 /* code only works for square matrices A */ 375 376 /* find size of row to the left of the diagonal part */ 377 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 378 row = row - diag; 379 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 380 if (garray[b->j[b->i[row]+l]] > diag) break; 381 } 382 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 383 384 /* diagonal part */ 385 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 386 387 /* right of diagonal part */ 388 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 389 #if defined(PETSC_HAVE_DEVICE) 390 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 391 #endif 392 PetscFunctionReturn(0); 393 } 394 395 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 396 { 397 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 398 PetscScalar value = 0.0; 399 PetscErrorCode ierr; 400 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 401 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 402 PetscBool roworiented = aij->roworiented; 403 404 /* Some Variables required in the macro */ 405 Mat A = aij->A; 406 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 407 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 408 PetscBool ignorezeroentries = a->ignorezeroentries; 409 Mat B = aij->B; 410 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 411 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 412 MatScalar *aa,*ba; 413 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 414 * cannot use "#if defined" inside a macro. */ 415 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 416 417 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 418 PetscInt nonew; 419 MatScalar *ap1,*ap2; 420 421 PetscFunctionBegin; 422 #if defined(PETSC_HAVE_DEVICE) 423 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 424 const PetscScalar *dummy; 425 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 426 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 427 } 428 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 429 const PetscScalar *dummy; 430 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 431 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 432 } 433 #endif 434 aa = a->a; 435 ba = b->a; 436 for (i=0; i<m; i++) { 437 if (im[i] < 0) continue; 438 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 439 if (im[i] >= rstart && im[i] < rend) { 440 row = im[i] - rstart; 441 lastcol1 = -1; 442 rp1 = aj + ai[row]; 443 ap1 = aa + ai[row]; 444 rmax1 = aimax[row]; 445 nrow1 = ailen[row]; 446 low1 = 0; 447 high1 = nrow1; 448 lastcol2 = -1; 449 rp2 = bj + bi[row]; 450 ap2 = ba + bi[row]; 451 rmax2 = bimax[row]; 452 nrow2 = bilen[row]; 453 low2 = 0; 454 high2 = nrow2; 455 456 for (j=0; j<n; j++) { 457 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 458 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 459 if (in[j] >= cstart && in[j] < cend) { 460 col = in[j] - cstart; 461 nonew = a->nonew; 462 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 463 #if defined(PETSC_HAVE_DEVICE) 464 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 465 #endif 466 } else if (in[j] < 0) continue; 467 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 468 else { 469 if (mat->was_assembled) { 470 if (!aij->colmap) { 471 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 472 } 473 #if defined(PETSC_USE_CTABLE) 474 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 475 col--; 476 #else 477 col = aij->colmap[in[j]] - 1; 478 #endif 479 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 480 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 481 col = in[j]; 482 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 483 B = aij->B; 484 b = (Mat_SeqAIJ*)B->data; 485 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 486 rp2 = bj + bi[row]; 487 ap2 = ba + bi[row]; 488 rmax2 = bimax[row]; 489 nrow2 = bilen[row]; 490 low2 = 0; 491 high2 = nrow2; 492 bm = aij->B->rmap->n; 493 ba = b->a; 494 inserted = PETSC_FALSE; 495 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 496 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 497 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 498 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 499 } 500 } else col = in[j]; 501 nonew = b->nonew; 502 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 503 #if defined(PETSC_HAVE_DEVICE) 504 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 505 #endif 506 } 507 } 508 } else { 509 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 510 if (!aij->donotstash) { 511 mat->assembled = PETSC_FALSE; 512 if (roworiented) { 513 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 514 } else { 515 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 516 } 517 } 518 } 519 } 520 PetscFunctionReturn(0); 521 } 522 523 /* 524 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 525 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 526 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 527 */ 528 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 Mat A = aij->A; /* diagonal part of the matrix */ 532 Mat B = aij->B; /* offdiagonal part of the matrix */ 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 535 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 536 PetscInt *ailen = a->ilen,*aj = a->j; 537 PetscInt *bilen = b->ilen,*bj = b->j; 538 PetscInt am = aij->A->rmap->n,j; 539 PetscInt diag_so_far = 0,dnz; 540 PetscInt offd_so_far = 0,onz; 541 542 PetscFunctionBegin; 543 /* Iterate over all rows of the matrix */ 544 for (j=0; j<am; j++) { 545 dnz = onz = 0; 546 /* Iterate over all non-zero columns of the current row */ 547 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 548 /* If column is in the diagonal */ 549 if (mat_j[col] >= cstart && mat_j[col] < cend) { 550 aj[diag_so_far++] = mat_j[col] - cstart; 551 dnz++; 552 } else { /* off-diagonal entries */ 553 bj[offd_so_far++] = mat_j[col]; 554 onz++; 555 } 556 } 557 ailen[j] = dnz; 558 bilen[j] = onz; 559 } 560 PetscFunctionReturn(0); 561 } 562 563 /* 564 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 565 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 566 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 567 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 568 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 569 */ 570 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 571 { 572 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 573 Mat A = aij->A; /* diagonal part of the matrix */ 574 Mat B = aij->B; /* offdiagonal part of the matrix */ 575 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 576 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 577 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 578 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 579 PetscInt *ailen = a->ilen,*aj = a->j; 580 PetscInt *bilen = b->ilen,*bj = b->j; 581 PetscInt am = aij->A->rmap->n,j; 582 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 583 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 584 PetscScalar *aa = a->a,*ba = b->a; 585 586 PetscFunctionBegin; 587 /* Iterate over all rows of the matrix */ 588 for (j=0; j<am; j++) { 589 dnz_row = onz_row = 0; 590 rowstart_offd = full_offd_i[j]; 591 rowstart_diag = full_diag_i[j]; 592 /* Iterate over all non-zero columns of the current row */ 593 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 594 /* If column is in the diagonal */ 595 if (mat_j[col] >= cstart && mat_j[col] < cend) { 596 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 597 aa[rowstart_diag+dnz_row] = mat_a[col]; 598 dnz_row++; 599 } else { /* off-diagonal entries */ 600 bj[rowstart_offd+onz_row] = mat_j[col]; 601 ba[rowstart_offd+onz_row] = mat_a[col]; 602 onz_row++; 603 } 604 } 605 ailen[j] = dnz_row; 606 bilen[j] = onz_row; 607 } 608 PetscFunctionReturn(0); 609 } 610 611 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 612 { 613 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 614 PetscErrorCode ierr; 615 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 616 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 617 618 PetscFunctionBegin; 619 for (i=0; i<m; i++) { 620 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 621 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 622 if (idxm[i] >= rstart && idxm[i] < rend) { 623 row = idxm[i] - rstart; 624 for (j=0; j<n; j++) { 625 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 626 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 627 if (idxn[j] >= cstart && idxn[j] < cend) { 628 col = idxn[j] - cstart; 629 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 630 } else { 631 if (!aij->colmap) { 632 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 633 } 634 #if defined(PETSC_USE_CTABLE) 635 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 636 col--; 637 #else 638 col = aij->colmap[idxn[j]] - 1; 639 #endif 640 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 641 else { 642 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 643 } 644 } 645 } 646 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 647 } 648 PetscFunctionReturn(0); 649 } 650 651 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 PetscErrorCode ierr; 655 PetscInt nstash,reallocs; 656 657 PetscFunctionBegin; 658 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscMPIInt n; 671 PetscInt i,j,rstart,ncols,flg; 672 PetscInt *row,*col; 673 PetscBool other_disassembled; 674 PetscScalar *val; 675 676 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 677 678 PetscFunctionBegin; 679 if (!aij->donotstash && !mat->nooffprocentries) { 680 while (1) { 681 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 682 if (!flg) break; 683 684 for (i=0; i<n;) { 685 /* Now identify the consecutive vals belonging to the same row */ 686 for (j=i,rstart=row[j]; j<n; j++) { 687 if (row[j] != rstart) break; 688 } 689 if (j < n) ncols = j-i; 690 else ncols = n-i; 691 /* Now assemble all these values with a single function call */ 692 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 693 i = j; 694 } 695 } 696 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 697 } 698 #if defined(PETSC_HAVE_DEVICE) 699 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 700 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 701 if (mat->boundtocpu) { 702 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 703 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 704 } 705 #endif 706 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 708 709 /* determine if any processor has disassembled, if so we must 710 also disassemble ourself, in order that we may reassemble. */ 711 /* 712 if nonzero structure of submatrix B cannot change then we know that 713 no processor disassembled thus we can skip this stuff 714 */ 715 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 716 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 717 if (mat->was_assembled && !other_disassembled) { 718 #if defined(PETSC_HAVE_DEVICE) 719 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 720 #endif 721 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 722 } 723 } 724 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 725 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 726 } 727 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 728 #if defined(PETSC_HAVE_DEVICE) 729 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 730 #endif 731 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 733 734 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 735 736 aij->rowvalues = NULL; 737 738 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 739 740 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 741 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 742 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 743 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 744 } 745 #if defined(PETSC_HAVE_DEVICE) 746 mat->offloadmask = PETSC_OFFLOAD_BOTH; 747 #endif 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 752 { 753 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 754 PetscErrorCode ierr; 755 756 PetscFunctionBegin; 757 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 758 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } 761 762 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 763 { 764 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 765 PetscObjectState sA, sB; 766 PetscInt *lrows; 767 PetscInt r, len; 768 PetscBool cong, lch, gch; 769 PetscErrorCode ierr; 770 771 PetscFunctionBegin; 772 /* get locally owned rows */ 773 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 774 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 775 /* fix right hand side if needed */ 776 if (x && b) { 777 const PetscScalar *xx; 778 PetscScalar *bb; 779 780 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 781 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 782 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 783 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 784 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 785 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 786 } 787 788 sA = mat->A->nonzerostate; 789 sB = mat->B->nonzerostate; 790 791 if (diag != 0.0 && cong) { 792 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 793 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 794 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 795 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 796 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 797 PetscInt nnwA, nnwB; 798 PetscBool nnzA, nnzB; 799 800 nnwA = aijA->nonew; 801 nnwB = aijB->nonew; 802 nnzA = aijA->keepnonzeropattern; 803 nnzB = aijB->keepnonzeropattern; 804 if (!nnzA) { 805 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 806 aijA->nonew = 0; 807 } 808 if (!nnzB) { 809 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 810 aijB->nonew = 0; 811 } 812 /* Must zero here before the next loop */ 813 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 814 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 815 for (r = 0; r < len; ++r) { 816 const PetscInt row = lrows[r] + A->rmap->rstart; 817 if (row >= A->cmap->N) continue; 818 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 819 } 820 aijA->nonew = nnwA; 821 aijB->nonew = nnwB; 822 } else { 823 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 824 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 828 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 829 830 /* reduce nonzerostate */ 831 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 832 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 833 if (gch) A->nonzerostate++; 834 PetscFunctionReturn(0); 835 } 836 837 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 838 { 839 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 840 PetscErrorCode ierr; 841 PetscMPIInt n = A->rmap->n; 842 PetscInt i,j,r,m,len = 0; 843 PetscInt *lrows,*owners = A->rmap->range; 844 PetscMPIInt p = 0; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x && b) { /* this code is buggy when the row and column layout don't match */ 888 PetscBool cong; 889 890 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 891 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 892 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 895 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 896 } 897 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 898 /* remove zeroed rows of off diagonal matrix */ 899 ii = aij->i; 900 for (i=0; i<len; i++) { 901 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 902 } 903 /* loop over all elements of off process part of matrix zeroing removed columns*/ 904 if (aij->compressedrow.use) { 905 m = aij->compressedrow.nrows; 906 ii = aij->compressedrow.i; 907 ridx = aij->compressedrow.rindex; 908 for (i=0; i<m; i++) { 909 n = ii[i+1] - ii[i]; 910 aj = aij->j + ii[i]; 911 aa = aij->a + ii[i]; 912 913 for (j=0; j<n; j++) { 914 if (PetscAbsScalar(mask[*aj])) { 915 if (b) bb[*ridx] -= *aa*xx[*aj]; 916 *aa = 0.0; 917 } 918 aa++; 919 aj++; 920 } 921 ridx++; 922 } 923 } else { /* do not use compressed row format */ 924 m = l->B->rmap->n; 925 for (i=0; i<m; i++) { 926 n = ii[i+1] - ii[i]; 927 aj = aij->j + ii[i]; 928 aa = aij->a + ii[i]; 929 for (j=0; j<n; j++) { 930 if (PetscAbsScalar(mask[*aj])) { 931 if (b) bb[i] -= *aa*xx[*aj]; 932 *aa = 0.0; 933 } 934 aa++; 935 aj++; 936 } 937 } 938 } 939 if (x && b) { 940 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 941 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 942 } 943 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 944 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 945 ierr = PetscFree(lrows);CHKERRQ(ierr); 946 947 /* only change matrix nonzero state if pattern was allowed to be changed */ 948 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 949 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 950 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 951 } 952 PetscFunctionReturn(0); 953 } 954 955 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 956 { 957 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 958 PetscErrorCode ierr; 959 PetscInt nt; 960 VecScatter Mvctx = a->Mvctx; 961 962 PetscFunctionBegin; 963 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 964 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 965 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 967 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 973 { 974 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 975 PetscErrorCode ierr; 976 977 PetscFunctionBegin; 978 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 PetscErrorCode ierr; 986 VecScatter Mvctx = a->Mvctx; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 997 { 998 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 999 PetscErrorCode ierr; 1000 1001 PetscFunctionBegin; 1002 /* do nondiagonal part */ 1003 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1004 /* do local part */ 1005 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1006 /* add partial results together */ 1007 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1008 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1013 { 1014 MPI_Comm comm; 1015 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1016 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1017 IS Me,Notme; 1018 PetscErrorCode ierr; 1019 PetscInt M,N,first,last,*notme,i; 1020 PetscBool lf; 1021 PetscMPIInt size; 1022 1023 PetscFunctionBegin; 1024 /* Easy test: symmetric diagonal block */ 1025 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1026 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1027 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1028 if (!*f) PetscFunctionReturn(0); 1029 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1030 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1031 if (size == 1) PetscFunctionReturn(0); 1032 1033 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1034 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1035 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1036 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1037 for (i=0; i<first; i++) notme[i] = i; 1038 for (i=last; i<M; i++) notme[i-last+first] = i; 1039 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1040 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1041 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1042 Aoff = Aoffs[0]; 1043 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1044 Boff = Boffs[0]; 1045 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1046 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1047 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1048 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1049 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1050 ierr = PetscFree(notme);CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1055 { 1056 PetscErrorCode ierr; 1057 1058 PetscFunctionBegin; 1059 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1064 { 1065 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 1068 PetscFunctionBegin; 1069 /* do nondiagonal part */ 1070 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1071 /* do local part */ 1072 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1073 /* add partial results together */ 1074 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1075 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 /* 1080 This only works correctly for square matrices where the subblock A->A is the 1081 diagonal block 1082 */ 1083 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1084 { 1085 PetscErrorCode ierr; 1086 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1087 1088 PetscFunctionBegin; 1089 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1090 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1091 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1096 { 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1102 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1107 { 1108 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 #if defined(PETSC_USE_LOG) 1113 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1114 #endif 1115 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1116 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1117 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1118 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1119 #if defined(PETSC_USE_CTABLE) 1120 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1121 #else 1122 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1123 #endif 1124 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1125 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1126 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1127 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1128 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1129 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1130 1131 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1132 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1133 1134 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1144 #if defined(PETSC_HAVE_CUDA) 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1146 #endif 1147 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1149 #endif 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1151 #if defined(PETSC_HAVE_ELEMENTAL) 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1153 #endif 1154 #if defined(PETSC_HAVE_SCALAPACK) 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1156 #endif 1157 #if defined(PETSC_HAVE_HYPRE) 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1159 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1160 #endif 1161 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1167 #if defined(PETSC_HAVE_MKL_SPARSE) 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1169 #endif 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1177 { 1178 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1179 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1180 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1181 const PetscInt *garray = aij->garray; 1182 const PetscScalar *aa,*ba; 1183 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1184 PetscInt *rowlens; 1185 PetscInt *colidxs; 1186 PetscScalar *matvals; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1191 1192 M = mat->rmap->N; 1193 N = mat->cmap->N; 1194 m = mat->rmap->n; 1195 rs = mat->rmap->rstart; 1196 cs = mat->cmap->rstart; 1197 nz = A->nz + B->nz; 1198 1199 /* write matrix header */ 1200 header[0] = MAT_FILE_CLASSID; 1201 header[1] = M; header[2] = N; header[3] = nz; 1202 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1203 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1204 1205 /* fill in and store row lengths */ 1206 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1207 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1208 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1209 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1210 1211 /* fill in and store column indices */ 1212 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1213 for (cnt=0, i=0; i<m; i++) { 1214 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1215 if (garray[B->j[jb]] > cs) break; 1216 colidxs[cnt++] = garray[B->j[jb]]; 1217 } 1218 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1219 colidxs[cnt++] = A->j[ja] + cs; 1220 for (; jb<B->i[i+1]; jb++) 1221 colidxs[cnt++] = garray[B->j[jb]]; 1222 } 1223 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1224 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1225 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1226 1227 /* fill in and store nonzero values */ 1228 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1229 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1230 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1231 for (cnt=0, i=0; i<m; i++) { 1232 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1233 if (garray[B->j[jb]] > cs) break; 1234 matvals[cnt++] = ba[jb]; 1235 } 1236 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1237 matvals[cnt++] = aa[ja]; 1238 for (; jb<B->i[i+1]; jb++) 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1242 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1243 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1244 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1245 ierr = PetscFree(matvals);CHKERRQ(ierr); 1246 1247 /* write block size option to the viewer's .info file */ 1248 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #include <petscdraw.h> 1253 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1254 { 1255 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1256 PetscErrorCode ierr; 1257 PetscMPIInt rank = aij->rank,size = aij->size; 1258 PetscBool isdraw,iascii,isbinary; 1259 PetscViewer sviewer; 1260 PetscViewerFormat format; 1261 1262 PetscFunctionBegin; 1263 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1264 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1265 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1266 if (iascii) { 1267 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1268 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1269 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1270 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1271 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1272 for (i=0; i<(PetscInt)size; i++) { 1273 nmax = PetscMax(nmax,nz[i]); 1274 nmin = PetscMin(nmin,nz[i]); 1275 navg += nz[i]; 1276 } 1277 ierr = PetscFree(nz);CHKERRQ(ierr); 1278 navg = navg/size; 1279 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1283 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1284 MatInfo info; 1285 PetscInt *inodes=NULL; 1286 1287 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1288 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1289 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1290 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1291 if (!inodes) { 1292 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1293 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1294 } else { 1295 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1296 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1297 } 1298 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1299 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1300 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1301 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1302 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1304 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1305 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1308 PetscInt inodecount,inodelimit,*inodes; 1309 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1310 if (inodes) { 1311 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1312 } else { 1313 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1314 } 1315 PetscFunctionReturn(0); 1316 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1317 PetscFunctionReturn(0); 1318 } 1319 } else if (isbinary) { 1320 if (size == 1) { 1321 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1322 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1323 } else { 1324 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1325 } 1326 PetscFunctionReturn(0); 1327 } else if (iascii && size == 1) { 1328 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1329 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } else if (isdraw) { 1332 PetscDraw draw; 1333 PetscBool isnull; 1334 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1335 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1336 if (isnull) PetscFunctionReturn(0); 1337 } 1338 1339 { /* assemble the entire matrix onto first processor */ 1340 Mat A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1344 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1345 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1346 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1347 /* The commented code uses MatCreateSubMatrices instead */ 1348 /* 1349 Mat *AA, A = NULL, Av; 1350 IS isrow,iscol; 1351 1352 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1353 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1354 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1355 if (!rank) { 1356 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1357 A = AA[0]; 1358 Av = AA[0]; 1359 } 1360 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1361 */ 1362 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1363 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1364 /* 1365 Everyone has to call to draw the matrix since the graphics waits are 1366 synchronized across all processors that share the PetscDraw object 1367 */ 1368 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1369 if (!rank) { 1370 if (((PetscObject)mat)->name) { 1371 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1372 } 1373 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1376 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1377 ierr = MatDestroy(&A);CHKERRQ(ierr); 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1383 { 1384 PetscErrorCode ierr; 1385 PetscBool iascii,isdraw,issocket,isbinary; 1386 1387 PetscFunctionBegin; 1388 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1389 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1392 if (iascii || isdraw || isbinary || issocket) { 1393 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1399 { 1400 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1401 PetscErrorCode ierr; 1402 Vec bb1 = NULL; 1403 PetscBool hasop; 1404 1405 PetscFunctionBegin; 1406 if (flag == SOR_APPLY_UPPER) { 1407 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1412 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1413 } 1414 1415 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1416 if (flag & SOR_ZERO_INITIAL_GUESS) { 1417 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1418 its--; 1419 } 1420 1421 while (its--) { 1422 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1423 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1424 1425 /* update rhs: bb1 = bb - B*x */ 1426 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1427 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1428 1429 /* local sweep */ 1430 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1431 } 1432 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1433 if (flag & SOR_ZERO_INITIAL_GUESS) { 1434 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1435 its--; 1436 } 1437 while (its--) { 1438 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1439 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1440 1441 /* update rhs: bb1 = bb - B*x */ 1442 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1443 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1444 1445 /* local sweep */ 1446 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1447 } 1448 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1449 if (flag & SOR_ZERO_INITIAL_GUESS) { 1450 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1451 its--; 1452 } 1453 while (its--) { 1454 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1455 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1456 1457 /* update rhs: bb1 = bb - B*x */ 1458 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1459 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1460 1461 /* local sweep */ 1462 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1463 } 1464 } else if (flag & SOR_EISENSTAT) { 1465 Vec xx1; 1466 1467 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1468 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1469 1470 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1472 if (!mat->diag) { 1473 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1474 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1475 } 1476 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1477 if (hasop) { 1478 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1479 } else { 1480 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1481 } 1482 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1483 1484 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1485 1486 /* local sweep */ 1487 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1488 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1489 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1490 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1491 1492 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1493 1494 matin->factorerrortype = mat->A->factorerrortype; 1495 PetscFunctionReturn(0); 1496 } 1497 1498 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1499 { 1500 Mat aA,aB,Aperm; 1501 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1502 PetscScalar *aa,*ba; 1503 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1504 PetscSF rowsf,sf; 1505 IS parcolp = NULL; 1506 PetscBool done; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1511 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1512 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1513 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1514 1515 /* Invert row permutation to find out where my rows should go */ 1516 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1517 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1518 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1519 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1520 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1521 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1522 1523 /* Invert column permutation to find out where my columns should go */ 1524 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1525 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1526 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1527 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1528 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1529 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1530 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1531 1532 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1533 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1534 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1535 1536 /* Find out where my gcols should go */ 1537 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1538 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1539 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1540 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1541 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1542 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1543 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1544 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1545 1546 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1547 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1548 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1549 for (i=0; i<m; i++) { 1550 PetscInt row = rdest[i]; 1551 PetscMPIInt rowner; 1552 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1553 for (j=ai[i]; j<ai[i+1]; j++) { 1554 PetscInt col = cdest[aj[j]]; 1555 PetscMPIInt cowner; 1556 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1557 if (rowner == cowner) dnnz[i]++; 1558 else onnz[i]++; 1559 } 1560 for (j=bi[i]; j<bi[i+1]; j++) { 1561 PetscInt col = gcdest[bj[j]]; 1562 PetscMPIInt cowner; 1563 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1564 if (rowner == cowner) dnnz[i]++; 1565 else onnz[i]++; 1566 } 1567 } 1568 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1569 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1570 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1571 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1573 1574 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1575 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1576 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1577 for (i=0; i<m; i++) { 1578 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1579 PetscInt j0,rowlen; 1580 rowlen = ai[i+1] - ai[i]; 1581 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1582 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1583 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1584 } 1585 rowlen = bi[i+1] - bi[i]; 1586 for (j0=j=0; j<rowlen; j0=j) { 1587 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1588 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1589 } 1590 } 1591 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1592 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1593 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1594 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1595 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1596 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1597 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1598 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1599 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1600 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1601 *B = Aperm; 1602 PetscFunctionReturn(0); 1603 } 1604 1605 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1606 { 1607 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1608 PetscErrorCode ierr; 1609 1610 PetscFunctionBegin; 1611 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1612 if (ghosts) *ghosts = aij->garray; 1613 PetscFunctionReturn(0); 1614 } 1615 1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1617 { 1618 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1619 Mat A = mat->A,B = mat->B; 1620 PetscErrorCode ierr; 1621 PetscLogDouble isend[5],irecv[5]; 1622 1623 PetscFunctionBegin; 1624 info->block_size = 1.0; 1625 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1626 1627 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1628 isend[3] = info->memory; isend[4] = info->mallocs; 1629 1630 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1631 1632 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1633 isend[3] += info->memory; isend[4] += info->mallocs; 1634 if (flag == MAT_LOCAL) { 1635 info->nz_used = isend[0]; 1636 info->nz_allocated = isend[1]; 1637 info->nz_unneeded = isend[2]; 1638 info->memory = isend[3]; 1639 info->mallocs = isend[4]; 1640 } else if (flag == MAT_GLOBAL_MAX) { 1641 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1642 1643 info->nz_used = irecv[0]; 1644 info->nz_allocated = irecv[1]; 1645 info->nz_unneeded = irecv[2]; 1646 info->memory = irecv[3]; 1647 info->mallocs = irecv[4]; 1648 } else if (flag == MAT_GLOBAL_SUM) { 1649 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1650 1651 info->nz_used = irecv[0]; 1652 info->nz_allocated = irecv[1]; 1653 info->nz_unneeded = irecv[2]; 1654 info->memory = irecv[3]; 1655 info->mallocs = irecv[4]; 1656 } 1657 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1658 info->fill_ratio_needed = 0; 1659 info->factor_mallocs = 0; 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1664 { 1665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 switch (op) { 1670 case MAT_NEW_NONZERO_LOCATIONS: 1671 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1672 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1673 case MAT_KEEP_NONZERO_PATTERN: 1674 case MAT_NEW_NONZERO_LOCATION_ERR: 1675 case MAT_USE_INODES: 1676 case MAT_IGNORE_ZERO_ENTRIES: 1677 case MAT_FORM_EXPLICIT_TRANSPOSE: 1678 MatCheckPreallocated(A,1); 1679 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1680 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1681 break; 1682 case MAT_ROW_ORIENTED: 1683 MatCheckPreallocated(A,1); 1684 a->roworiented = flg; 1685 1686 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1687 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1688 break; 1689 case MAT_FORCE_DIAGONAL_ENTRIES: 1690 case MAT_SORTED_FULL: 1691 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1692 break; 1693 case MAT_IGNORE_OFF_PROC_ENTRIES: 1694 a->donotstash = flg; 1695 break; 1696 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1697 case MAT_SPD: 1698 case MAT_SYMMETRIC: 1699 case MAT_STRUCTURALLY_SYMMETRIC: 1700 case MAT_HERMITIAN: 1701 case MAT_SYMMETRY_ETERNAL: 1702 break; 1703 case MAT_SUBMAT_SINGLEIS: 1704 A->submat_singleis = flg; 1705 break; 1706 case MAT_STRUCTURE_ONLY: 1707 /* The option is handled directly by MatSetOption() */ 1708 break; 1709 default: 1710 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1711 } 1712 PetscFunctionReturn(0); 1713 } 1714 1715 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1716 { 1717 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1718 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1719 PetscErrorCode ierr; 1720 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1721 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1722 PetscInt *cmap,*idx_p; 1723 1724 PetscFunctionBegin; 1725 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1726 mat->getrowactive = PETSC_TRUE; 1727 1728 if (!mat->rowvalues && (idx || v)) { 1729 /* 1730 allocate enough space to hold information from the longest row. 1731 */ 1732 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1733 PetscInt max = 1,tmp; 1734 for (i=0; i<matin->rmap->n; i++) { 1735 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1736 if (max < tmp) max = tmp; 1737 } 1738 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1739 } 1740 1741 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1742 lrow = row - rstart; 1743 1744 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1745 if (!v) {pvA = NULL; pvB = NULL;} 1746 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1747 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1748 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1749 nztot = nzA + nzB; 1750 1751 cmap = mat->garray; 1752 if (v || idx) { 1753 if (nztot) { 1754 /* Sort by increasing column numbers, assuming A and B already sorted */ 1755 PetscInt imark = -1; 1756 if (v) { 1757 *v = v_p = mat->rowvalues; 1758 for (i=0; i<nzB; i++) { 1759 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1760 else break; 1761 } 1762 imark = i; 1763 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1764 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1765 } 1766 if (idx) { 1767 *idx = idx_p = mat->rowindices; 1768 if (imark > -1) { 1769 for (i=0; i<imark; i++) { 1770 idx_p[i] = cmap[cworkB[i]]; 1771 } 1772 } else { 1773 for (i=0; i<nzB; i++) { 1774 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1775 else break; 1776 } 1777 imark = i; 1778 } 1779 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1780 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1781 } 1782 } else { 1783 if (idx) *idx = NULL; 1784 if (v) *v = NULL; 1785 } 1786 } 1787 *nz = nztot; 1788 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1789 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1794 { 1795 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1796 1797 PetscFunctionBegin; 1798 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1799 aij->getrowactive = PETSC_FALSE; 1800 PetscFunctionReturn(0); 1801 } 1802 1803 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1804 { 1805 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1806 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1807 PetscErrorCode ierr; 1808 PetscInt i,j,cstart = mat->cmap->rstart; 1809 PetscReal sum = 0.0; 1810 MatScalar *v; 1811 1812 PetscFunctionBegin; 1813 if (aij->size == 1) { 1814 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1815 } else { 1816 if (type == NORM_FROBENIUS) { 1817 v = amat->a; 1818 for (i=0; i<amat->nz; i++) { 1819 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1820 } 1821 v = bmat->a; 1822 for (i=0; i<bmat->nz; i++) { 1823 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1824 } 1825 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1826 *norm = PetscSqrtReal(*norm); 1827 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1828 } else if (type == NORM_1) { /* max column norm */ 1829 PetscReal *tmp,*tmp2; 1830 PetscInt *jj,*garray = aij->garray; 1831 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1832 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1833 *norm = 0.0; 1834 v = amat->a; jj = amat->j; 1835 for (j=0; j<amat->nz; j++) { 1836 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1837 } 1838 v = bmat->a; jj = bmat->j; 1839 for (j=0; j<bmat->nz; j++) { 1840 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1841 } 1842 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1843 for (j=0; j<mat->cmap->N; j++) { 1844 if (tmp2[j] > *norm) *norm = tmp2[j]; 1845 } 1846 ierr = PetscFree(tmp);CHKERRQ(ierr); 1847 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1848 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1849 } else if (type == NORM_INFINITY) { /* max row norm */ 1850 PetscReal ntemp = 0.0; 1851 for (j=0; j<aij->A->rmap->n; j++) { 1852 v = amat->a + amat->i[j]; 1853 sum = 0.0; 1854 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1855 sum += PetscAbsScalar(*v); v++; 1856 } 1857 v = bmat->a + bmat->i[j]; 1858 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1859 sum += PetscAbsScalar(*v); v++; 1860 } 1861 if (sum > ntemp) ntemp = sum; 1862 } 1863 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1864 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1865 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1866 } 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1871 { 1872 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1873 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1874 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1875 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1876 PetscErrorCode ierr; 1877 Mat B,A_diag,*B_diag; 1878 const MatScalar *pbv,*bv; 1879 1880 PetscFunctionBegin; 1881 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1882 ai = Aloc->i; aj = Aloc->j; 1883 bi = Bloc->i; bj = Bloc->j; 1884 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1885 PetscInt *d_nnz,*g_nnz,*o_nnz; 1886 PetscSFNode *oloc; 1887 PETSC_UNUSED PetscSF sf; 1888 1889 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1890 /* compute d_nnz for preallocation */ 1891 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1892 for (i=0; i<ai[ma]; i++) { 1893 d_nnz[aj[i]]++; 1894 } 1895 /* compute local off-diagonal contributions */ 1896 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1897 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1898 /* map those to global */ 1899 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1900 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1901 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1902 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1903 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1904 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1905 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1906 1907 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1908 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1909 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1910 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1911 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1912 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1913 } else { 1914 B = *matout; 1915 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1916 } 1917 1918 b = (Mat_MPIAIJ*)B->data; 1919 A_diag = a->A; 1920 B_diag = &b->A; 1921 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1922 A_diag_ncol = A_diag->cmap->N; 1923 B_diag_ilen = sub_B_diag->ilen; 1924 B_diag_i = sub_B_diag->i; 1925 1926 /* Set ilen for diagonal of B */ 1927 for (i=0; i<A_diag_ncol; i++) { 1928 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1929 } 1930 1931 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1932 very quickly (=without using MatSetValues), because all writes are local. */ 1933 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1934 1935 /* copy over the B part */ 1936 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1937 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1938 pbv = bv; 1939 row = A->rmap->rstart; 1940 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1941 cols_tmp = cols; 1942 for (i=0; i<mb; i++) { 1943 ncol = bi[i+1]-bi[i]; 1944 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1945 row++; 1946 pbv += ncol; cols_tmp += ncol; 1947 } 1948 ierr = PetscFree(cols);CHKERRQ(ierr); 1949 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1950 1951 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1952 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1953 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1954 *matout = B; 1955 } else { 1956 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1957 } 1958 PetscFunctionReturn(0); 1959 } 1960 1961 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1962 { 1963 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1964 Mat a = aij->A,b = aij->B; 1965 PetscErrorCode ierr; 1966 PetscInt s1,s2,s3; 1967 1968 PetscFunctionBegin; 1969 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1970 if (rr) { 1971 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1972 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1973 /* Overlap communication with computation. */ 1974 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1975 } 1976 if (ll) { 1977 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1978 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1979 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1980 } 1981 /* scale the diagonal block */ 1982 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1983 1984 if (rr) { 1985 /* Do a scatter end and then right scale the off-diagonal block */ 1986 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1987 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 1992 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1993 { 1994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1995 PetscErrorCode ierr; 1996 1997 PetscFunctionBegin; 1998 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2003 { 2004 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2005 Mat a,b,c,d; 2006 PetscBool flg; 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 a = matA->A; b = matA->B; 2011 c = matB->A; d = matB->B; 2012 2013 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2014 if (flg) { 2015 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2016 } 2017 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2022 { 2023 PetscErrorCode ierr; 2024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2025 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2026 2027 PetscFunctionBegin; 2028 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2029 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2030 /* because of the column compression in the off-processor part of the matrix a->B, 2031 the number of columns in a->B and b->B may be different, hence we cannot call 2032 the MatCopy() directly on the two parts. If need be, we can provide a more 2033 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2034 then copying the submatrices */ 2035 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2036 } else { 2037 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2038 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2039 } 2040 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2045 { 2046 PetscErrorCode ierr; 2047 2048 PetscFunctionBegin; 2049 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 /* 2054 Computes the number of nonzeros per row needed for preallocation when X and Y 2055 have different nonzero structure. 2056 */ 2057 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2058 { 2059 PetscInt i,j,k,nzx,nzy; 2060 2061 PetscFunctionBegin; 2062 /* Set the number of nonzeros in the new matrix */ 2063 for (i=0; i<m; i++) { 2064 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2065 nzx = xi[i+1] - xi[i]; 2066 nzy = yi[i+1] - yi[i]; 2067 nnz[i] = 0; 2068 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2069 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2070 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2071 nnz[i]++; 2072 } 2073 for (; k<nzy; k++) nnz[i]++; 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2079 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2080 { 2081 PetscErrorCode ierr; 2082 PetscInt m = Y->rmap->N; 2083 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2084 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2085 2086 PetscFunctionBegin; 2087 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2088 PetscFunctionReturn(0); 2089 } 2090 2091 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2092 { 2093 PetscErrorCode ierr; 2094 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2095 2096 PetscFunctionBegin; 2097 if (str == SAME_NONZERO_PATTERN) { 2098 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2099 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2100 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2101 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2102 } else { 2103 Mat B; 2104 PetscInt *nnz_d,*nnz_o; 2105 2106 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2107 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2108 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2109 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2110 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2111 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2112 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2113 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2114 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2115 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2116 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2117 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2118 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2119 } 2120 PetscFunctionReturn(0); 2121 } 2122 2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2124 2125 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2126 { 2127 #if defined(PETSC_USE_COMPLEX) 2128 PetscErrorCode ierr; 2129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2130 2131 PetscFunctionBegin; 2132 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2133 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2134 #else 2135 PetscFunctionBegin; 2136 #endif 2137 PetscFunctionReturn(0); 2138 } 2139 2140 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2141 { 2142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2143 PetscErrorCode ierr; 2144 2145 PetscFunctionBegin; 2146 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2147 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2152 { 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2154 PetscErrorCode ierr; 2155 2156 PetscFunctionBegin; 2157 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2158 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2163 { 2164 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2165 PetscErrorCode ierr; 2166 PetscInt i,*idxb = NULL,m = A->rmap->n; 2167 PetscScalar *va,*vv; 2168 Vec vB,vA; 2169 const PetscScalar *vb; 2170 2171 PetscFunctionBegin; 2172 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2173 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2174 2175 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2176 if (idx) { 2177 for (i=0; i<m; i++) { 2178 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2179 } 2180 } 2181 2182 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2183 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2184 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2185 2186 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2187 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2188 for (i=0; i<m; i++) { 2189 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2190 vv[i] = vb[i]; 2191 if (idx) idx[i] = a->garray[idxb[i]]; 2192 } else { 2193 vv[i] = va[i]; 2194 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2195 idx[i] = a->garray[idxb[i]]; 2196 } 2197 } 2198 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2199 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2200 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2201 ierr = PetscFree(idxb);CHKERRQ(ierr); 2202 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2203 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2208 { 2209 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2210 PetscInt m = A->rmap->n,n = A->cmap->n; 2211 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2212 PetscInt *cmap = mat->garray; 2213 PetscInt *diagIdx, *offdiagIdx; 2214 Vec diagV, offdiagV; 2215 PetscScalar *a, *diagA, *offdiagA; 2216 const PetscScalar *ba,*bav; 2217 PetscInt r,j,col,ncols,*bi,*bj; 2218 PetscErrorCode ierr; 2219 Mat B = mat->B; 2220 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2221 2222 PetscFunctionBegin; 2223 /* When a process holds entire A and other processes have no entry */ 2224 if (A->cmap->N == n) { 2225 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2226 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2227 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2228 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2229 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } else if (n == 0) { 2232 if (m) { 2233 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2234 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2235 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2241 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2242 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2243 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2244 2245 /* Get offdiagIdx[] for implicit 0.0 */ 2246 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2247 ba = bav; 2248 bi = b->i; 2249 bj = b->j; 2250 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2251 for (r = 0; r < m; r++) { 2252 ncols = bi[r+1] - bi[r]; 2253 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2254 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2255 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2256 offdiagA[r] = 0.0; 2257 2258 /* Find first hole in the cmap */ 2259 for (j=0; j<ncols; j++) { 2260 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2261 if (col > j && j < cstart) { 2262 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2263 break; 2264 } else if (col > j + n && j >= cstart) { 2265 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2266 break; 2267 } 2268 } 2269 if (j == ncols && ncols < A->cmap->N - n) { 2270 /* a hole is outside compressed Bcols */ 2271 if (ncols == 0) { 2272 if (cstart) { 2273 offdiagIdx[r] = 0; 2274 } else offdiagIdx[r] = cend; 2275 } else { /* ncols > 0 */ 2276 offdiagIdx[r] = cmap[ncols-1] + 1; 2277 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2278 } 2279 } 2280 } 2281 2282 for (j=0; j<ncols; j++) { 2283 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2284 ba++; bj++; 2285 } 2286 } 2287 2288 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2289 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2290 for (r = 0; r < m; ++r) { 2291 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2292 a[r] = diagA[r]; 2293 if (idx) idx[r] = cstart + diagIdx[r]; 2294 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2295 a[r] = diagA[r]; 2296 if (idx) { 2297 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2298 idx[r] = cstart + diagIdx[r]; 2299 } else idx[r] = offdiagIdx[r]; 2300 } 2301 } else { 2302 a[r] = offdiagA[r]; 2303 if (idx) idx[r] = offdiagIdx[r]; 2304 } 2305 } 2306 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2307 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2308 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2309 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2310 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2311 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2312 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2313 PetscFunctionReturn(0); 2314 } 2315 2316 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2317 { 2318 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2319 PetscInt m = A->rmap->n,n = A->cmap->n; 2320 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2321 PetscInt *cmap = mat->garray; 2322 PetscInt *diagIdx, *offdiagIdx; 2323 Vec diagV, offdiagV; 2324 PetscScalar *a, *diagA, *offdiagA; 2325 const PetscScalar *ba,*bav; 2326 PetscInt r,j,col,ncols,*bi,*bj; 2327 PetscErrorCode ierr; 2328 Mat B = mat->B; 2329 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2330 2331 PetscFunctionBegin; 2332 /* When a process holds entire A and other processes have no entry */ 2333 if (A->cmap->N == n) { 2334 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2335 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2336 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2337 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2338 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } else if (n == 0) { 2341 if (m) { 2342 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2343 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2344 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2345 } 2346 PetscFunctionReturn(0); 2347 } 2348 2349 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2350 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2351 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2352 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2353 2354 /* Get offdiagIdx[] for implicit 0.0 */ 2355 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2356 ba = bav; 2357 bi = b->i; 2358 bj = b->j; 2359 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2360 for (r = 0; r < m; r++) { 2361 ncols = bi[r+1] - bi[r]; 2362 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2363 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2364 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2365 offdiagA[r] = 0.0; 2366 2367 /* Find first hole in the cmap */ 2368 for (j=0; j<ncols; j++) { 2369 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2370 if (col > j && j < cstart) { 2371 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2372 break; 2373 } else if (col > j + n && j >= cstart) { 2374 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2375 break; 2376 } 2377 } 2378 if (j == ncols && ncols < A->cmap->N - n) { 2379 /* a hole is outside compressed Bcols */ 2380 if (ncols == 0) { 2381 if (cstart) { 2382 offdiagIdx[r] = 0; 2383 } else offdiagIdx[r] = cend; 2384 } else { /* ncols > 0 */ 2385 offdiagIdx[r] = cmap[ncols-1] + 1; 2386 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2387 } 2388 } 2389 } 2390 2391 for (j=0; j<ncols; j++) { 2392 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2393 ba++; bj++; 2394 } 2395 } 2396 2397 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2398 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2399 for (r = 0; r < m; ++r) { 2400 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2401 a[r] = diagA[r]; 2402 if (idx) idx[r] = cstart + diagIdx[r]; 2403 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2404 a[r] = diagA[r]; 2405 if (idx) { 2406 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2407 idx[r] = cstart + diagIdx[r]; 2408 } else idx[r] = offdiagIdx[r]; 2409 } 2410 } else { 2411 a[r] = offdiagA[r]; 2412 if (idx) idx[r] = offdiagIdx[r]; 2413 } 2414 } 2415 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2416 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2417 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2418 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2419 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2420 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2421 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2426 { 2427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2428 PetscInt m = A->rmap->n,n = A->cmap->n; 2429 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2430 PetscInt *cmap = mat->garray; 2431 PetscInt *diagIdx, *offdiagIdx; 2432 Vec diagV, offdiagV; 2433 PetscScalar *a, *diagA, *offdiagA; 2434 const PetscScalar *ba,*bav; 2435 PetscInt r,j,col,ncols,*bi,*bj; 2436 PetscErrorCode ierr; 2437 Mat B = mat->B; 2438 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2439 2440 PetscFunctionBegin; 2441 /* When a process holds entire A and other processes have no entry */ 2442 if (A->cmap->N == n) { 2443 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2444 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2445 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2446 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2447 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2448 PetscFunctionReturn(0); 2449 } else if (n == 0) { 2450 if (m) { 2451 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2452 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2453 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2454 } 2455 PetscFunctionReturn(0); 2456 } 2457 2458 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2459 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2460 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2461 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2462 2463 /* Get offdiagIdx[] for implicit 0.0 */ 2464 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2465 ba = bav; 2466 bi = b->i; 2467 bj = b->j; 2468 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2469 for (r = 0; r < m; r++) { 2470 ncols = bi[r+1] - bi[r]; 2471 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2472 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2473 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2474 offdiagA[r] = 0.0; 2475 2476 /* Find first hole in the cmap */ 2477 for (j=0; j<ncols; j++) { 2478 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2479 if (col > j && j < cstart) { 2480 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2481 break; 2482 } else if (col > j + n && j >= cstart) { 2483 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2484 break; 2485 } 2486 } 2487 if (j == ncols && ncols < A->cmap->N - n) { 2488 /* a hole is outside compressed Bcols */ 2489 if (ncols == 0) { 2490 if (cstart) { 2491 offdiagIdx[r] = 0; 2492 } else offdiagIdx[r] = cend; 2493 } else { /* ncols > 0 */ 2494 offdiagIdx[r] = cmap[ncols-1] + 1; 2495 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2496 } 2497 } 2498 } 2499 2500 for (j=0; j<ncols; j++) { 2501 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2502 ba++; bj++; 2503 } 2504 } 2505 2506 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2507 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2508 for (r = 0; r < m; ++r) { 2509 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 if (idx) idx[r] = cstart + diagIdx[r]; 2512 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2513 a[r] = diagA[r]; 2514 if (idx) { 2515 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2516 idx[r] = cstart + diagIdx[r]; 2517 } else idx[r] = offdiagIdx[r]; 2518 } 2519 } else { 2520 a[r] = offdiagA[r]; 2521 if (idx) idx[r] = offdiagIdx[r]; 2522 } 2523 } 2524 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2525 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2526 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2527 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2528 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2529 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2530 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2535 { 2536 PetscErrorCode ierr; 2537 Mat *dummy; 2538 2539 PetscFunctionBegin; 2540 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2541 *newmat = *dummy; 2542 ierr = PetscFree(dummy);CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2547 { 2548 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2549 PetscErrorCode ierr; 2550 2551 PetscFunctionBegin; 2552 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2553 A->factorerrortype = a->A->factorerrortype; 2554 PetscFunctionReturn(0); 2555 } 2556 2557 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2558 { 2559 PetscErrorCode ierr; 2560 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2561 2562 PetscFunctionBegin; 2563 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2564 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2565 if (x->assembled) { 2566 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2567 } else { 2568 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2569 } 2570 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2571 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2576 { 2577 PetscFunctionBegin; 2578 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2579 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2580 PetscFunctionReturn(0); 2581 } 2582 2583 /*@ 2584 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2585 2586 Collective on Mat 2587 2588 Input Parameters: 2589 + A - the matrix 2590 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2591 2592 Level: advanced 2593 2594 @*/ 2595 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2596 { 2597 PetscErrorCode ierr; 2598 2599 PetscFunctionBegin; 2600 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2605 { 2606 PetscErrorCode ierr; 2607 PetscBool sc = PETSC_FALSE,flg; 2608 2609 PetscFunctionBegin; 2610 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2611 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2612 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2613 if (flg) { 2614 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2615 } 2616 ierr = PetscOptionsTail();CHKERRQ(ierr); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2621 { 2622 PetscErrorCode ierr; 2623 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2624 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2625 2626 PetscFunctionBegin; 2627 if (!Y->preallocated) { 2628 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2629 } else if (!aij->nz) { 2630 PetscInt nonew = aij->nonew; 2631 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2632 aij->nonew = nonew; 2633 } 2634 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2635 PetscFunctionReturn(0); 2636 } 2637 2638 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2639 { 2640 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2641 PetscErrorCode ierr; 2642 2643 PetscFunctionBegin; 2644 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2645 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2646 if (d) { 2647 PetscInt rstart; 2648 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2649 *d += rstart; 2650 2651 } 2652 PetscFunctionReturn(0); 2653 } 2654 2655 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2656 { 2657 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2658 PetscErrorCode ierr; 2659 2660 PetscFunctionBegin; 2661 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 /* -------------------------------------------------------------------*/ 2666 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2667 MatGetRow_MPIAIJ, 2668 MatRestoreRow_MPIAIJ, 2669 MatMult_MPIAIJ, 2670 /* 4*/ MatMultAdd_MPIAIJ, 2671 MatMultTranspose_MPIAIJ, 2672 MatMultTransposeAdd_MPIAIJ, 2673 NULL, 2674 NULL, 2675 NULL, 2676 /*10*/ NULL, 2677 NULL, 2678 NULL, 2679 MatSOR_MPIAIJ, 2680 MatTranspose_MPIAIJ, 2681 /*15*/ MatGetInfo_MPIAIJ, 2682 MatEqual_MPIAIJ, 2683 MatGetDiagonal_MPIAIJ, 2684 MatDiagonalScale_MPIAIJ, 2685 MatNorm_MPIAIJ, 2686 /*20*/ MatAssemblyBegin_MPIAIJ, 2687 MatAssemblyEnd_MPIAIJ, 2688 MatSetOption_MPIAIJ, 2689 MatZeroEntries_MPIAIJ, 2690 /*24*/ MatZeroRows_MPIAIJ, 2691 NULL, 2692 NULL, 2693 NULL, 2694 NULL, 2695 /*29*/ MatSetUp_MPIAIJ, 2696 NULL, 2697 NULL, 2698 MatGetDiagonalBlock_MPIAIJ, 2699 NULL, 2700 /*34*/ MatDuplicate_MPIAIJ, 2701 NULL, 2702 NULL, 2703 NULL, 2704 NULL, 2705 /*39*/ MatAXPY_MPIAIJ, 2706 MatCreateSubMatrices_MPIAIJ, 2707 MatIncreaseOverlap_MPIAIJ, 2708 MatGetValues_MPIAIJ, 2709 MatCopy_MPIAIJ, 2710 /*44*/ MatGetRowMax_MPIAIJ, 2711 MatScale_MPIAIJ, 2712 MatShift_MPIAIJ, 2713 MatDiagonalSet_MPIAIJ, 2714 MatZeroRowsColumns_MPIAIJ, 2715 /*49*/ MatSetRandom_MPIAIJ, 2716 NULL, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*54*/ MatFDColoringCreate_MPIXAIJ, 2721 NULL, 2722 MatSetUnfactored_MPIAIJ, 2723 MatPermute_MPIAIJ, 2724 NULL, 2725 /*59*/ MatCreateSubMatrix_MPIAIJ, 2726 MatDestroy_MPIAIJ, 2727 MatView_MPIAIJ, 2728 NULL, 2729 NULL, 2730 /*64*/ NULL, 2731 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2732 NULL, 2733 NULL, 2734 NULL, 2735 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2736 MatGetRowMinAbs_MPIAIJ, 2737 NULL, 2738 NULL, 2739 NULL, 2740 NULL, 2741 /*75*/ MatFDColoringApply_AIJ, 2742 MatSetFromOptions_MPIAIJ, 2743 NULL, 2744 NULL, 2745 MatFindZeroDiagonals_MPIAIJ, 2746 /*80*/ NULL, 2747 NULL, 2748 NULL, 2749 /*83*/ MatLoad_MPIAIJ, 2750 MatIsSymmetric_MPIAIJ, 2751 NULL, 2752 NULL, 2753 NULL, 2754 NULL, 2755 /*89*/ NULL, 2756 NULL, 2757 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2758 NULL, 2759 NULL, 2760 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2761 NULL, 2762 NULL, 2763 NULL, 2764 MatBindToCPU_MPIAIJ, 2765 /*99*/ MatProductSetFromOptions_MPIAIJ, 2766 NULL, 2767 NULL, 2768 MatConjugate_MPIAIJ, 2769 NULL, 2770 /*104*/MatSetValuesRow_MPIAIJ, 2771 MatRealPart_MPIAIJ, 2772 MatImaginaryPart_MPIAIJ, 2773 NULL, 2774 NULL, 2775 /*109*/NULL, 2776 NULL, 2777 MatGetRowMin_MPIAIJ, 2778 NULL, 2779 MatMissingDiagonal_MPIAIJ, 2780 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2781 NULL, 2782 MatGetGhosts_MPIAIJ, 2783 NULL, 2784 NULL, 2785 /*119*/MatMultDiagonalBlock_MPIAIJ, 2786 NULL, 2787 NULL, 2788 NULL, 2789 MatGetMultiProcBlock_MPIAIJ, 2790 /*124*/MatFindNonzeroRows_MPIAIJ, 2791 MatGetColumnNorms_MPIAIJ, 2792 MatInvertBlockDiagonal_MPIAIJ, 2793 MatInvertVariableBlockDiagonal_MPIAIJ, 2794 MatCreateSubMatricesMPI_MPIAIJ, 2795 /*129*/NULL, 2796 NULL, 2797 NULL, 2798 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2799 NULL, 2800 /*134*/NULL, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /*139*/MatSetBlockSizes_MPIAIJ, 2806 NULL, 2807 NULL, 2808 MatFDColoringSetUp_MPIXAIJ, 2809 MatFindOffBlockDiagonalEntries_MPIAIJ, 2810 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2811 /*145*/NULL, 2812 NULL, 2813 NULL 2814 }; 2815 2816 /* ----------------------------------------------------------------------------------------*/ 2817 2818 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2819 { 2820 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2821 PetscErrorCode ierr; 2822 2823 PetscFunctionBegin; 2824 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2825 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2826 PetscFunctionReturn(0); 2827 } 2828 2829 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2830 { 2831 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2832 PetscErrorCode ierr; 2833 2834 PetscFunctionBegin; 2835 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2836 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2837 PetscFunctionReturn(0); 2838 } 2839 2840 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2841 { 2842 Mat_MPIAIJ *b; 2843 PetscErrorCode ierr; 2844 PetscMPIInt size; 2845 2846 PetscFunctionBegin; 2847 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2848 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2849 b = (Mat_MPIAIJ*)B->data; 2850 2851 #if defined(PETSC_USE_CTABLE) 2852 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2853 #else 2854 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2855 #endif 2856 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2857 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2858 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2859 2860 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2861 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2862 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2863 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2864 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2865 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2866 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2867 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2868 2869 if (!B->preallocated) { 2870 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2871 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2872 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2873 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2874 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2875 } 2876 2877 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2878 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2879 B->preallocated = PETSC_TRUE; 2880 B->was_assembled = PETSC_FALSE; 2881 B->assembled = PETSC_FALSE; 2882 PetscFunctionReturn(0); 2883 } 2884 2885 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2886 { 2887 Mat_MPIAIJ *b; 2888 PetscErrorCode ierr; 2889 2890 PetscFunctionBegin; 2891 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2892 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2893 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2894 b = (Mat_MPIAIJ*)B->data; 2895 2896 #if defined(PETSC_USE_CTABLE) 2897 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2898 #else 2899 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2900 #endif 2901 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2902 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2903 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2904 2905 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2906 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2907 B->preallocated = PETSC_TRUE; 2908 B->was_assembled = PETSC_FALSE; 2909 B->assembled = PETSC_FALSE; 2910 PetscFunctionReturn(0); 2911 } 2912 2913 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2914 { 2915 Mat mat; 2916 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2917 PetscErrorCode ierr; 2918 2919 PetscFunctionBegin; 2920 *newmat = NULL; 2921 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2922 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2923 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2924 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2925 a = (Mat_MPIAIJ*)mat->data; 2926 2927 mat->factortype = matin->factortype; 2928 mat->assembled = matin->assembled; 2929 mat->insertmode = NOT_SET_VALUES; 2930 mat->preallocated = matin->preallocated; 2931 2932 a->size = oldmat->size; 2933 a->rank = oldmat->rank; 2934 a->donotstash = oldmat->donotstash; 2935 a->roworiented = oldmat->roworiented; 2936 a->rowindices = NULL; 2937 a->rowvalues = NULL; 2938 a->getrowactive = PETSC_FALSE; 2939 2940 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2941 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2942 2943 if (oldmat->colmap) { 2944 #if defined(PETSC_USE_CTABLE) 2945 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2946 #else 2947 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2948 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2949 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2950 #endif 2951 } else a->colmap = NULL; 2952 if (oldmat->garray) { 2953 PetscInt len; 2954 len = oldmat->B->cmap->n; 2955 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2956 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2957 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2958 } else a->garray = NULL; 2959 2960 /* It may happen MatDuplicate is called with a non-assembled matrix 2961 In fact, MatDuplicate only requires the matrix to be preallocated 2962 This may happen inside a DMCreateMatrix_Shell */ 2963 if (oldmat->lvec) { 2964 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2965 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2966 } 2967 if (oldmat->Mvctx) { 2968 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2969 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2970 } 2971 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2972 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2973 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2974 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2975 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2976 *newmat = mat; 2977 PetscFunctionReturn(0); 2978 } 2979 2980 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2981 { 2982 PetscBool isbinary, ishdf5; 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2987 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2988 /* force binary viewer to load .info file if it has not yet done so */ 2989 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2990 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2991 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2992 if (isbinary) { 2993 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2994 } else if (ishdf5) { 2995 #if defined(PETSC_HAVE_HDF5) 2996 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2997 #else 2998 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2999 #endif 3000 } else { 3001 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3002 } 3003 PetscFunctionReturn(0); 3004 } 3005 3006 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3007 { 3008 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3009 PetscInt *rowidxs,*colidxs; 3010 PetscScalar *matvals; 3011 PetscErrorCode ierr; 3012 3013 PetscFunctionBegin; 3014 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3015 3016 /* read in matrix header */ 3017 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3018 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3019 M = header[1]; N = header[2]; nz = header[3]; 3020 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3021 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3022 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3023 3024 /* set block sizes from the viewer's .info file */ 3025 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3026 /* set global sizes if not set already */ 3027 if (mat->rmap->N < 0) mat->rmap->N = M; 3028 if (mat->cmap->N < 0) mat->cmap->N = N; 3029 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3030 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3031 3032 /* check if the matrix sizes are correct */ 3033 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3034 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3035 3036 /* read in row lengths and build row indices */ 3037 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3038 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3039 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3040 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3041 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3042 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3043 /* read in column indices and matrix values */ 3044 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3045 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3046 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3047 /* store matrix indices and values */ 3048 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3049 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3050 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 3054 /* Not scalable because of ISAllGather() unless getting all columns. */ 3055 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3056 { 3057 PetscErrorCode ierr; 3058 IS iscol_local; 3059 PetscBool isstride; 3060 PetscMPIInt lisstride=0,gisstride; 3061 3062 PetscFunctionBegin; 3063 /* check if we are grabbing all columns*/ 3064 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3065 3066 if (isstride) { 3067 PetscInt start,len,mstart,mlen; 3068 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3069 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3070 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3071 if (mstart == start && mlen-mstart == len) lisstride = 1; 3072 } 3073 3074 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3075 if (gisstride) { 3076 PetscInt N; 3077 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3078 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3079 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3080 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3081 } else { 3082 PetscInt cbs; 3083 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3084 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3085 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3086 } 3087 3088 *isseq = iscol_local; 3089 PetscFunctionReturn(0); 3090 } 3091 3092 /* 3093 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3094 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3095 3096 Input Parameters: 3097 mat - matrix 3098 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3099 i.e., mat->rstart <= isrow[i] < mat->rend 3100 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3101 i.e., mat->cstart <= iscol[i] < mat->cend 3102 Output Parameter: 3103 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3104 iscol_o - sequential column index set for retrieving mat->B 3105 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3106 */ 3107 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3108 { 3109 PetscErrorCode ierr; 3110 Vec x,cmap; 3111 const PetscInt *is_idx; 3112 PetscScalar *xarray,*cmaparray; 3113 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3114 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3115 Mat B=a->B; 3116 Vec lvec=a->lvec,lcmap; 3117 PetscInt i,cstart,cend,Bn=B->cmap->N; 3118 MPI_Comm comm; 3119 VecScatter Mvctx=a->Mvctx; 3120 3121 PetscFunctionBegin; 3122 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3123 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3124 3125 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3126 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3127 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3128 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3129 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3130 3131 /* Get start indices */ 3132 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3133 isstart -= ncols; 3134 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3135 3136 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3137 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3138 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3139 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3140 for (i=0; i<ncols; i++) { 3141 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3142 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3143 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3144 } 3145 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3146 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3147 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3148 3149 /* Get iscol_d */ 3150 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3151 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3152 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3153 3154 /* Get isrow_d */ 3155 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3156 rstart = mat->rmap->rstart; 3157 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3158 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3159 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3160 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3161 3162 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3163 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3164 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3165 3166 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3167 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3168 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3169 3170 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3171 3172 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3173 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3174 3175 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3176 /* off-process column indices */ 3177 count = 0; 3178 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3179 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3180 3181 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3182 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3183 for (i=0; i<Bn; i++) { 3184 if (PetscRealPart(xarray[i]) > -1.0) { 3185 idx[count] = i; /* local column index in off-diagonal part B */ 3186 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3187 count++; 3188 } 3189 } 3190 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3191 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3192 3193 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3194 /* cannot ensure iscol_o has same blocksize as iscol! */ 3195 3196 ierr = PetscFree(idx);CHKERRQ(ierr); 3197 *garray = cmap1; 3198 3199 ierr = VecDestroy(&x);CHKERRQ(ierr); 3200 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3201 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3202 PetscFunctionReturn(0); 3203 } 3204 3205 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3206 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3207 { 3208 PetscErrorCode ierr; 3209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3210 Mat M = NULL; 3211 MPI_Comm comm; 3212 IS iscol_d,isrow_d,iscol_o; 3213 Mat Asub = NULL,Bsub = NULL; 3214 PetscInt n; 3215 3216 PetscFunctionBegin; 3217 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3218 3219 if (call == MAT_REUSE_MATRIX) { 3220 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3221 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3222 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3223 3224 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3225 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3226 3227 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3228 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3229 3230 /* Update diagonal and off-diagonal portions of submat */ 3231 asub = (Mat_MPIAIJ*)(*submat)->data; 3232 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3233 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3234 if (n) { 3235 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3236 } 3237 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3238 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3239 3240 } else { /* call == MAT_INITIAL_MATRIX) */ 3241 const PetscInt *garray; 3242 PetscInt BsubN; 3243 3244 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3245 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3246 3247 /* Create local submatrices Asub and Bsub */ 3248 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3249 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3250 3251 /* Create submatrix M */ 3252 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3253 3254 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3255 asub = (Mat_MPIAIJ*)M->data; 3256 3257 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3258 n = asub->B->cmap->N; 3259 if (BsubN > n) { 3260 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3261 const PetscInt *idx; 3262 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3263 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3264 3265 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3266 j = 0; 3267 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3268 for (i=0; i<n; i++) { 3269 if (j >= BsubN) break; 3270 while (subgarray[i] > garray[j]) j++; 3271 3272 if (subgarray[i] == garray[j]) { 3273 idx_new[i] = idx[j++]; 3274 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3275 } 3276 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3277 3278 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3279 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3280 3281 } else if (BsubN < n) { 3282 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3283 } 3284 3285 ierr = PetscFree(garray);CHKERRQ(ierr); 3286 *submat = M; 3287 3288 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3289 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3290 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3291 3292 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3293 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3294 3295 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3296 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3297 } 3298 PetscFunctionReturn(0); 3299 } 3300 3301 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3302 { 3303 PetscErrorCode ierr; 3304 IS iscol_local=NULL,isrow_d; 3305 PetscInt csize; 3306 PetscInt n,i,j,start,end; 3307 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3308 MPI_Comm comm; 3309 3310 PetscFunctionBegin; 3311 /* If isrow has same processor distribution as mat, 3312 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3313 if (call == MAT_REUSE_MATRIX) { 3314 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3315 if (isrow_d) { 3316 sameRowDist = PETSC_TRUE; 3317 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3318 } else { 3319 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3320 if (iscol_local) { 3321 sameRowDist = PETSC_TRUE; 3322 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3323 } 3324 } 3325 } else { 3326 /* Check if isrow has same processor distribution as mat */ 3327 sameDist[0] = PETSC_FALSE; 3328 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3329 if (!n) { 3330 sameDist[0] = PETSC_TRUE; 3331 } else { 3332 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3333 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3334 if (i >= start && j < end) { 3335 sameDist[0] = PETSC_TRUE; 3336 } 3337 } 3338 3339 /* Check if iscol has same processor distribution as mat */ 3340 sameDist[1] = PETSC_FALSE; 3341 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3342 if (!n) { 3343 sameDist[1] = PETSC_TRUE; 3344 } else { 3345 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3346 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3347 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3348 } 3349 3350 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3351 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3352 sameRowDist = tsameDist[0]; 3353 } 3354 3355 if (sameRowDist) { 3356 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3357 /* isrow and iscol have same processor distribution as mat */ 3358 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3359 PetscFunctionReturn(0); 3360 } else { /* sameRowDist */ 3361 /* isrow has same processor distribution as mat */ 3362 if (call == MAT_INITIAL_MATRIX) { 3363 PetscBool sorted; 3364 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3365 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3366 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3367 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3368 3369 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3370 if (sorted) { 3371 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3372 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3373 PetscFunctionReturn(0); 3374 } 3375 } else { /* call == MAT_REUSE_MATRIX */ 3376 IS iscol_sub; 3377 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3378 if (iscol_sub) { 3379 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3380 PetscFunctionReturn(0); 3381 } 3382 } 3383 } 3384 } 3385 3386 /* General case: iscol -> iscol_local which has global size of iscol */ 3387 if (call == MAT_REUSE_MATRIX) { 3388 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3389 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3390 } else { 3391 if (!iscol_local) { 3392 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3393 } 3394 } 3395 3396 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3397 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3398 3399 if (call == MAT_INITIAL_MATRIX) { 3400 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3401 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3402 } 3403 PetscFunctionReturn(0); 3404 } 3405 3406 /*@C 3407 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3408 and "off-diagonal" part of the matrix in CSR format. 3409 3410 Collective 3411 3412 Input Parameters: 3413 + comm - MPI communicator 3414 . A - "diagonal" portion of matrix 3415 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3416 - garray - global index of B columns 3417 3418 Output Parameter: 3419 . mat - the matrix, with input A as its local diagonal matrix 3420 Level: advanced 3421 3422 Notes: 3423 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3424 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3425 3426 .seealso: MatCreateMPIAIJWithSplitArrays() 3427 @*/ 3428 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3429 { 3430 PetscErrorCode ierr; 3431 Mat_MPIAIJ *maij; 3432 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3433 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3434 const PetscScalar *oa; 3435 Mat Bnew; 3436 PetscInt m,n,N; 3437 3438 PetscFunctionBegin; 3439 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3440 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3441 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3442 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3443 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3444 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3445 3446 /* Get global columns of mat */ 3447 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3448 3449 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3450 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3451 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3452 maij = (Mat_MPIAIJ*)(*mat)->data; 3453 3454 (*mat)->preallocated = PETSC_TRUE; 3455 3456 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3457 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3458 3459 /* Set A as diagonal portion of *mat */ 3460 maij->A = A; 3461 3462 nz = oi[m]; 3463 for (i=0; i<nz; i++) { 3464 col = oj[i]; 3465 oj[i] = garray[col]; 3466 } 3467 3468 /* Set Bnew as off-diagonal portion of *mat */ 3469 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3470 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3471 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3472 bnew = (Mat_SeqAIJ*)Bnew->data; 3473 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3474 maij->B = Bnew; 3475 3476 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3477 3478 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3479 b->free_a = PETSC_FALSE; 3480 b->free_ij = PETSC_FALSE; 3481 ierr = MatDestroy(&B);CHKERRQ(ierr); 3482 3483 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3484 bnew->free_a = PETSC_TRUE; 3485 bnew->free_ij = PETSC_TRUE; 3486 3487 /* condense columns of maij->B */ 3488 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3489 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3490 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3491 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3492 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3493 PetscFunctionReturn(0); 3494 } 3495 3496 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3497 3498 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3499 { 3500 PetscErrorCode ierr; 3501 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3502 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3503 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3504 Mat M,Msub,B=a->B; 3505 MatScalar *aa; 3506 Mat_SeqAIJ *aij; 3507 PetscInt *garray = a->garray,*colsub,Ncols; 3508 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3509 IS iscol_sub,iscmap; 3510 const PetscInt *is_idx,*cmap; 3511 PetscBool allcolumns=PETSC_FALSE; 3512 MPI_Comm comm; 3513 3514 PetscFunctionBegin; 3515 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3516 if (call == MAT_REUSE_MATRIX) { 3517 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3518 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3519 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3520 3521 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3522 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3523 3524 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3525 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3526 3527 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3528 3529 } else { /* call == MAT_INITIAL_MATRIX) */ 3530 PetscBool flg; 3531 3532 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3533 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3534 3535 /* (1) iscol -> nonscalable iscol_local */ 3536 /* Check for special case: each processor gets entire matrix columns */ 3537 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3538 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3539 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3540 if (allcolumns) { 3541 iscol_sub = iscol_local; 3542 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3543 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3544 3545 } else { 3546 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3547 PetscInt *idx,*cmap1,k; 3548 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3549 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3550 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3551 count = 0; 3552 k = 0; 3553 for (i=0; i<Ncols; i++) { 3554 j = is_idx[i]; 3555 if (j >= cstart && j < cend) { 3556 /* diagonal part of mat */ 3557 idx[count] = j; 3558 cmap1[count++] = i; /* column index in submat */ 3559 } else if (Bn) { 3560 /* off-diagonal part of mat */ 3561 if (j == garray[k]) { 3562 idx[count] = j; 3563 cmap1[count++] = i; /* column index in submat */ 3564 } else if (j > garray[k]) { 3565 while (j > garray[k] && k < Bn-1) k++; 3566 if (j == garray[k]) { 3567 idx[count] = j; 3568 cmap1[count++] = i; /* column index in submat */ 3569 } 3570 } 3571 } 3572 } 3573 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3574 3575 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3576 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3577 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3578 3579 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3580 } 3581 3582 /* (3) Create sequential Msub */ 3583 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3584 } 3585 3586 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3587 aij = (Mat_SeqAIJ*)(Msub)->data; 3588 ii = aij->i; 3589 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3590 3591 /* 3592 m - number of local rows 3593 Ncols - number of columns (same on all processors) 3594 rstart - first row in new global matrix generated 3595 */ 3596 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3597 3598 if (call == MAT_INITIAL_MATRIX) { 3599 /* (4) Create parallel newmat */ 3600 PetscMPIInt rank,size; 3601 PetscInt csize; 3602 3603 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3604 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3605 3606 /* 3607 Determine the number of non-zeros in the diagonal and off-diagonal 3608 portions of the matrix in order to do correct preallocation 3609 */ 3610 3611 /* first get start and end of "diagonal" columns */ 3612 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3613 if (csize == PETSC_DECIDE) { 3614 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3615 if (mglobal == Ncols) { /* square matrix */ 3616 nlocal = m; 3617 } else { 3618 nlocal = Ncols/size + ((Ncols % size) > rank); 3619 } 3620 } else { 3621 nlocal = csize; 3622 } 3623 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3624 rstart = rend - nlocal; 3625 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3626 3627 /* next, compute all the lengths */ 3628 jj = aij->j; 3629 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3630 olens = dlens + m; 3631 for (i=0; i<m; i++) { 3632 jend = ii[i+1] - ii[i]; 3633 olen = 0; 3634 dlen = 0; 3635 for (j=0; j<jend; j++) { 3636 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3637 else dlen++; 3638 jj++; 3639 } 3640 olens[i] = olen; 3641 dlens[i] = dlen; 3642 } 3643 3644 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3645 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3646 3647 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3648 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3649 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3650 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3651 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3652 ierr = PetscFree(dlens);CHKERRQ(ierr); 3653 3654 } else { /* call == MAT_REUSE_MATRIX */ 3655 M = *newmat; 3656 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3657 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3658 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3659 /* 3660 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3661 rather than the slower MatSetValues(). 3662 */ 3663 M->was_assembled = PETSC_TRUE; 3664 M->assembled = PETSC_FALSE; 3665 } 3666 3667 /* (5) Set values of Msub to *newmat */ 3668 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3669 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3670 3671 jj = aij->j; 3672 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3673 for (i=0; i<m; i++) { 3674 row = rstart + i; 3675 nz = ii[i+1] - ii[i]; 3676 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3677 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3678 jj += nz; aa += nz; 3679 } 3680 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3681 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3682 3683 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3684 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3685 3686 ierr = PetscFree(colsub);CHKERRQ(ierr); 3687 3688 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3689 if (call == MAT_INITIAL_MATRIX) { 3690 *newmat = M; 3691 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3692 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3693 3694 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3695 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3696 3697 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3698 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3699 3700 if (iscol_local) { 3701 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3702 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3703 } 3704 } 3705 PetscFunctionReturn(0); 3706 } 3707 3708 /* 3709 Not great since it makes two copies of the submatrix, first an SeqAIJ 3710 in local and then by concatenating the local matrices the end result. 3711 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3712 3713 Note: This requires a sequential iscol with all indices. 3714 */ 3715 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3716 { 3717 PetscErrorCode ierr; 3718 PetscMPIInt rank,size; 3719 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3720 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3721 Mat M,Mreuse; 3722 MatScalar *aa,*vwork; 3723 MPI_Comm comm; 3724 Mat_SeqAIJ *aij; 3725 PetscBool colflag,allcolumns=PETSC_FALSE; 3726 3727 PetscFunctionBegin; 3728 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3729 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3730 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3731 3732 /* Check for special case: each processor gets entire matrix columns */ 3733 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3734 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3735 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3736 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3737 3738 if (call == MAT_REUSE_MATRIX) { 3739 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3740 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3741 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3742 } else { 3743 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3744 } 3745 3746 /* 3747 m - number of local rows 3748 n - number of columns (same on all processors) 3749 rstart - first row in new global matrix generated 3750 */ 3751 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3752 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3753 if (call == MAT_INITIAL_MATRIX) { 3754 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3755 ii = aij->i; 3756 jj = aij->j; 3757 3758 /* 3759 Determine the number of non-zeros in the diagonal and off-diagonal 3760 portions of the matrix in order to do correct preallocation 3761 */ 3762 3763 /* first get start and end of "diagonal" columns */ 3764 if (csize == PETSC_DECIDE) { 3765 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3766 if (mglobal == n) { /* square matrix */ 3767 nlocal = m; 3768 } else { 3769 nlocal = n/size + ((n % size) > rank); 3770 } 3771 } else { 3772 nlocal = csize; 3773 } 3774 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3775 rstart = rend - nlocal; 3776 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3777 3778 /* next, compute all the lengths */ 3779 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3780 olens = dlens + m; 3781 for (i=0; i<m; i++) { 3782 jend = ii[i+1] - ii[i]; 3783 olen = 0; 3784 dlen = 0; 3785 for (j=0; j<jend; j++) { 3786 if (*jj < rstart || *jj >= rend) olen++; 3787 else dlen++; 3788 jj++; 3789 } 3790 olens[i] = olen; 3791 dlens[i] = dlen; 3792 } 3793 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3794 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3795 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3796 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3797 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3798 ierr = PetscFree(dlens);CHKERRQ(ierr); 3799 } else { 3800 PetscInt ml,nl; 3801 3802 M = *newmat; 3803 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3804 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3805 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3806 /* 3807 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3808 rather than the slower MatSetValues(). 3809 */ 3810 M->was_assembled = PETSC_TRUE; 3811 M->assembled = PETSC_FALSE; 3812 } 3813 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3814 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3815 ii = aij->i; 3816 jj = aij->j; 3817 3818 /* trigger copy to CPU if needed */ 3819 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3820 for (i=0; i<m; i++) { 3821 row = rstart + i; 3822 nz = ii[i+1] - ii[i]; 3823 cwork = jj; jj += nz; 3824 vwork = aa; aa += nz; 3825 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3826 } 3827 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3828 3829 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 *newmat = M; 3832 3833 /* save submatrix used in processor for next request */ 3834 if (call == MAT_INITIAL_MATRIX) { 3835 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3836 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3837 } 3838 PetscFunctionReturn(0); 3839 } 3840 3841 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3842 { 3843 PetscInt m,cstart, cend,j,nnz,i,d; 3844 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3845 const PetscInt *JJ; 3846 PetscErrorCode ierr; 3847 PetscBool nooffprocentries; 3848 3849 PetscFunctionBegin; 3850 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3851 3852 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3853 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3854 m = B->rmap->n; 3855 cstart = B->cmap->rstart; 3856 cend = B->cmap->rend; 3857 rstart = B->rmap->rstart; 3858 3859 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3860 3861 if (PetscDefined(USE_DEBUG)) { 3862 for (i=0; i<m; i++) { 3863 nnz = Ii[i+1]- Ii[i]; 3864 JJ = J + Ii[i]; 3865 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3866 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3867 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3868 } 3869 } 3870 3871 for (i=0; i<m; i++) { 3872 nnz = Ii[i+1]- Ii[i]; 3873 JJ = J + Ii[i]; 3874 nnz_max = PetscMax(nnz_max,nnz); 3875 d = 0; 3876 for (j=0; j<nnz; j++) { 3877 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3878 } 3879 d_nnz[i] = d; 3880 o_nnz[i] = nnz - d; 3881 } 3882 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3883 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3884 3885 for (i=0; i<m; i++) { 3886 ii = i + rstart; 3887 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3888 } 3889 nooffprocentries = B->nooffprocentries; 3890 B->nooffprocentries = PETSC_TRUE; 3891 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 B->nooffprocentries = nooffprocentries; 3894 3895 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3896 PetscFunctionReturn(0); 3897 } 3898 3899 /*@ 3900 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3901 (the default parallel PETSc format). 3902 3903 Collective 3904 3905 Input Parameters: 3906 + B - the matrix 3907 . i - the indices into j for the start of each local row (starts with zero) 3908 . j - the column indices for each local row (starts with zero) 3909 - v - optional values in the matrix 3910 3911 Level: developer 3912 3913 Notes: 3914 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3915 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3916 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3917 3918 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3919 3920 The format which is used for the sparse matrix input, is equivalent to a 3921 row-major ordering.. i.e for the following matrix, the input data expected is 3922 as shown 3923 3924 $ 1 0 0 3925 $ 2 0 3 P0 3926 $ ------- 3927 $ 4 5 6 P1 3928 $ 3929 $ Process0 [P0]: rows_owned=[0,1] 3930 $ i = {0,1,3} [size = nrow+1 = 2+1] 3931 $ j = {0,0,2} [size = 3] 3932 $ v = {1,2,3} [size = 3] 3933 $ 3934 $ Process1 [P1]: rows_owned=[2] 3935 $ i = {0,3} [size = nrow+1 = 1+1] 3936 $ j = {0,1,2} [size = 3] 3937 $ v = {4,5,6} [size = 3] 3938 3939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3940 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3941 @*/ 3942 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3943 { 3944 PetscErrorCode ierr; 3945 3946 PetscFunctionBegin; 3947 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3948 PetscFunctionReturn(0); 3949 } 3950 3951 /*@C 3952 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3953 (the default parallel PETSc format). For good matrix assembly performance 3954 the user should preallocate the matrix storage by setting the parameters 3955 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3956 performance can be increased by more than a factor of 50. 3957 3958 Collective 3959 3960 Input Parameters: 3961 + B - the matrix 3962 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3963 (same value is used for all local rows) 3964 . d_nnz - array containing the number of nonzeros in the various rows of the 3965 DIAGONAL portion of the local submatrix (possibly different for each row) 3966 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3967 The size of this array is equal to the number of local rows, i.e 'm'. 3968 For matrices that will be factored, you must leave room for (and set) 3969 the diagonal entry even if it is zero. 3970 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3971 submatrix (same value is used for all local rows). 3972 - o_nnz - array containing the number of nonzeros in the various rows of the 3973 OFF-DIAGONAL portion of the local submatrix (possibly different for 3974 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3975 structure. The size of this array is equal to the number 3976 of local rows, i.e 'm'. 3977 3978 If the *_nnz parameter is given then the *_nz parameter is ignored 3979 3980 The AIJ format (also called the Yale sparse matrix format or 3981 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3982 storage. The stored row and column indices begin with zero. 3983 See Users-Manual: ch_mat for details. 3984 3985 The parallel matrix is partitioned such that the first m0 rows belong to 3986 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3987 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3988 3989 The DIAGONAL portion of the local submatrix of a processor can be defined 3990 as the submatrix which is obtained by extraction the part corresponding to 3991 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3992 first row that belongs to the processor, r2 is the last row belonging to 3993 the this processor, and c1-c2 is range of indices of the local part of a 3994 vector suitable for applying the matrix to. This is an mxn matrix. In the 3995 common case of a square matrix, the row and column ranges are the same and 3996 the DIAGONAL part is also square. The remaining portion of the local 3997 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3998 3999 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4000 4001 You can call MatGetInfo() to get information on how effective the preallocation was; 4002 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4003 You can also run with the option -info and look for messages with the string 4004 malloc in them to see if additional memory allocation was needed. 4005 4006 Example usage: 4007 4008 Consider the following 8x8 matrix with 34 non-zero values, that is 4009 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4010 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4011 as follows: 4012 4013 .vb 4014 1 2 0 | 0 3 0 | 0 4 4015 Proc0 0 5 6 | 7 0 0 | 8 0 4016 9 0 10 | 11 0 0 | 12 0 4017 ------------------------------------- 4018 13 0 14 | 15 16 17 | 0 0 4019 Proc1 0 18 0 | 19 20 21 | 0 0 4020 0 0 0 | 22 23 0 | 24 0 4021 ------------------------------------- 4022 Proc2 25 26 27 | 0 0 28 | 29 0 4023 30 0 0 | 31 32 33 | 0 34 4024 .ve 4025 4026 This can be represented as a collection of submatrices as: 4027 4028 .vb 4029 A B C 4030 D E F 4031 G H I 4032 .ve 4033 4034 Where the submatrices A,B,C are owned by proc0, D,E,F are 4035 owned by proc1, G,H,I are owned by proc2. 4036 4037 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4038 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4039 The 'M','N' parameters are 8,8, and have the same values on all procs. 4040 4041 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4042 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4043 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4044 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4045 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4046 matrix, ans [DF] as another SeqAIJ matrix. 4047 4048 When d_nz, o_nz parameters are specified, d_nz storage elements are 4049 allocated for every row of the local diagonal submatrix, and o_nz 4050 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4051 One way to choose d_nz and o_nz is to use the max nonzerors per local 4052 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4053 In this case, the values of d_nz,o_nz are: 4054 .vb 4055 proc0 : dnz = 2, o_nz = 2 4056 proc1 : dnz = 3, o_nz = 2 4057 proc2 : dnz = 1, o_nz = 4 4058 .ve 4059 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4060 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4061 for proc3. i.e we are using 12+15+10=37 storage locations to store 4062 34 values. 4063 4064 When d_nnz, o_nnz parameters are specified, the storage is specified 4065 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4066 In the above case the values for d_nnz,o_nnz are: 4067 .vb 4068 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4069 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4070 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4071 .ve 4072 Here the space allocated is sum of all the above values i.e 34, and 4073 hence pre-allocation is perfect. 4074 4075 Level: intermediate 4076 4077 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4078 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4079 @*/ 4080 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4081 { 4082 PetscErrorCode ierr; 4083 4084 PetscFunctionBegin; 4085 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4086 PetscValidType(B,1); 4087 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4088 PetscFunctionReturn(0); 4089 } 4090 4091 /*@ 4092 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4093 CSR format for the local rows. 4094 4095 Collective 4096 4097 Input Parameters: 4098 + comm - MPI communicator 4099 . m - number of local rows (Cannot be PETSC_DECIDE) 4100 . n - This value should be the same as the local size used in creating the 4101 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4102 calculated if N is given) For square matrices n is almost always m. 4103 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4104 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4105 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4106 . j - column indices 4107 - a - matrix values 4108 4109 Output Parameter: 4110 . mat - the matrix 4111 4112 Level: intermediate 4113 4114 Notes: 4115 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4116 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4117 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4118 4119 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4120 4121 The format which is used for the sparse matrix input, is equivalent to a 4122 row-major ordering.. i.e for the following matrix, the input data expected is 4123 as shown 4124 4125 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4126 4127 $ 1 0 0 4128 $ 2 0 3 P0 4129 $ ------- 4130 $ 4 5 6 P1 4131 $ 4132 $ Process0 [P0]: rows_owned=[0,1] 4133 $ i = {0,1,3} [size = nrow+1 = 2+1] 4134 $ j = {0,0,2} [size = 3] 4135 $ v = {1,2,3} [size = 3] 4136 $ 4137 $ Process1 [P1]: rows_owned=[2] 4138 $ i = {0,3} [size = nrow+1 = 1+1] 4139 $ j = {0,1,2} [size = 3] 4140 $ v = {4,5,6} [size = 3] 4141 4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4143 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4144 @*/ 4145 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4146 { 4147 PetscErrorCode ierr; 4148 4149 PetscFunctionBegin; 4150 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4151 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4152 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4153 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4154 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4155 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4156 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4157 PetscFunctionReturn(0); 4158 } 4159 4160 /*@ 4161 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4162 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4163 4164 Collective 4165 4166 Input Parameters: 4167 + mat - the matrix 4168 . m - number of local rows (Cannot be PETSC_DECIDE) 4169 . n - This value should be the same as the local size used in creating the 4170 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4171 calculated if N is given) For square matrices n is almost always m. 4172 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4173 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4174 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4175 . J - column indices 4176 - v - matrix values 4177 4178 Level: intermediate 4179 4180 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4181 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4182 @*/ 4183 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4184 { 4185 PetscErrorCode ierr; 4186 PetscInt cstart,nnz,i,j; 4187 PetscInt *ld; 4188 PetscBool nooffprocentries; 4189 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4190 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4191 PetscScalar *ad = Ad->a, *ao = Ao->a; 4192 const PetscInt *Adi = Ad->i; 4193 PetscInt ldi,Iii,md; 4194 4195 PetscFunctionBegin; 4196 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4197 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4198 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4199 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4200 4201 cstart = mat->cmap->rstart; 4202 if (!Aij->ld) { 4203 /* count number of entries below block diagonal */ 4204 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4205 Aij->ld = ld; 4206 for (i=0; i<m; i++) { 4207 nnz = Ii[i+1]- Ii[i]; 4208 j = 0; 4209 while (J[j] < cstart && j < nnz) {j++;} 4210 J += nnz; 4211 ld[i] = j; 4212 } 4213 } else { 4214 ld = Aij->ld; 4215 } 4216 4217 for (i=0; i<m; i++) { 4218 nnz = Ii[i+1]- Ii[i]; 4219 Iii = Ii[i]; 4220 ldi = ld[i]; 4221 md = Adi[i+1]-Adi[i]; 4222 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4223 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4224 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4225 ad += md; 4226 ao += nnz - md; 4227 } 4228 nooffprocentries = mat->nooffprocentries; 4229 mat->nooffprocentries = PETSC_TRUE; 4230 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4231 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4232 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4233 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4234 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4235 mat->nooffprocentries = nooffprocentries; 4236 PetscFunctionReturn(0); 4237 } 4238 4239 /*@C 4240 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4241 (the default parallel PETSc format). For good matrix assembly performance 4242 the user should preallocate the matrix storage by setting the parameters 4243 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4244 performance can be increased by more than a factor of 50. 4245 4246 Collective 4247 4248 Input Parameters: 4249 + comm - MPI communicator 4250 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4251 This value should be the same as the local size used in creating the 4252 y vector for the matrix-vector product y = Ax. 4253 . n - This value should be the same as the local size used in creating the 4254 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4255 calculated if N is given) For square matrices n is almost always m. 4256 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4257 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4258 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4259 (same value is used for all local rows) 4260 . d_nnz - array containing the number of nonzeros in the various rows of the 4261 DIAGONAL portion of the local submatrix (possibly different for each row) 4262 or NULL, if d_nz is used to specify the nonzero structure. 4263 The size of this array is equal to the number of local rows, i.e 'm'. 4264 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4265 submatrix (same value is used for all local rows). 4266 - o_nnz - array containing the number of nonzeros in the various rows of the 4267 OFF-DIAGONAL portion of the local submatrix (possibly different for 4268 each row) or NULL, if o_nz is used to specify the nonzero 4269 structure. The size of this array is equal to the number 4270 of local rows, i.e 'm'. 4271 4272 Output Parameter: 4273 . A - the matrix 4274 4275 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4276 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4277 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4278 4279 Notes: 4280 If the *_nnz parameter is given then the *_nz parameter is ignored 4281 4282 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4283 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4284 storage requirements for this matrix. 4285 4286 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4287 processor than it must be used on all processors that share the object for 4288 that argument. 4289 4290 The user MUST specify either the local or global matrix dimensions 4291 (possibly both). 4292 4293 The parallel matrix is partitioned across processors such that the 4294 first m0 rows belong to process 0, the next m1 rows belong to 4295 process 1, the next m2 rows belong to process 2 etc.. where 4296 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4297 values corresponding to [m x N] submatrix. 4298 4299 The columns are logically partitioned with the n0 columns belonging 4300 to 0th partition, the next n1 columns belonging to the next 4301 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4302 4303 The DIAGONAL portion of the local submatrix on any given processor 4304 is the submatrix corresponding to the rows and columns m,n 4305 corresponding to the given processor. i.e diagonal matrix on 4306 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4307 etc. The remaining portion of the local submatrix [m x (N-n)] 4308 constitute the OFF-DIAGONAL portion. The example below better 4309 illustrates this concept. 4310 4311 For a square global matrix we define each processor's diagonal portion 4312 to be its local rows and the corresponding columns (a square submatrix); 4313 each processor's off-diagonal portion encompasses the remainder of the 4314 local matrix (a rectangular submatrix). 4315 4316 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4317 4318 When calling this routine with a single process communicator, a matrix of 4319 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4320 type of communicator, use the construction mechanism 4321 .vb 4322 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4323 .ve 4324 4325 $ MatCreate(...,&A); 4326 $ MatSetType(A,MATMPIAIJ); 4327 $ MatSetSizes(A, m,n,M,N); 4328 $ MatMPIAIJSetPreallocation(A,...); 4329 4330 By default, this format uses inodes (identical nodes) when possible. 4331 We search for consecutive rows with the same nonzero structure, thereby 4332 reusing matrix information to achieve increased efficiency. 4333 4334 Options Database Keys: 4335 + -mat_no_inode - Do not use inodes 4336 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4337 4338 Example usage: 4339 4340 Consider the following 8x8 matrix with 34 non-zero values, that is 4341 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4342 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4343 as follows 4344 4345 .vb 4346 1 2 0 | 0 3 0 | 0 4 4347 Proc0 0 5 6 | 7 0 0 | 8 0 4348 9 0 10 | 11 0 0 | 12 0 4349 ------------------------------------- 4350 13 0 14 | 15 16 17 | 0 0 4351 Proc1 0 18 0 | 19 20 21 | 0 0 4352 0 0 0 | 22 23 0 | 24 0 4353 ------------------------------------- 4354 Proc2 25 26 27 | 0 0 28 | 29 0 4355 30 0 0 | 31 32 33 | 0 34 4356 .ve 4357 4358 This can be represented as a collection of submatrices as 4359 4360 .vb 4361 A B C 4362 D E F 4363 G H I 4364 .ve 4365 4366 Where the submatrices A,B,C are owned by proc0, D,E,F are 4367 owned by proc1, G,H,I are owned by proc2. 4368 4369 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4370 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4371 The 'M','N' parameters are 8,8, and have the same values on all procs. 4372 4373 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4374 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4375 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4376 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4377 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4378 matrix, ans [DF] as another SeqAIJ matrix. 4379 4380 When d_nz, o_nz parameters are specified, d_nz storage elements are 4381 allocated for every row of the local diagonal submatrix, and o_nz 4382 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4383 One way to choose d_nz and o_nz is to use the max nonzerors per local 4384 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4385 In this case, the values of d_nz,o_nz are 4386 .vb 4387 proc0 : dnz = 2, o_nz = 2 4388 proc1 : dnz = 3, o_nz = 2 4389 proc2 : dnz = 1, o_nz = 4 4390 .ve 4391 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4392 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4393 for proc3. i.e we are using 12+15+10=37 storage locations to store 4394 34 values. 4395 4396 When d_nnz, o_nnz parameters are specified, the storage is specified 4397 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4398 In the above case the values for d_nnz,o_nnz are 4399 .vb 4400 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4401 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4402 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4403 .ve 4404 Here the space allocated is sum of all the above values i.e 34, and 4405 hence pre-allocation is perfect. 4406 4407 Level: intermediate 4408 4409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4410 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4411 @*/ 4412 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4413 { 4414 PetscErrorCode ierr; 4415 PetscMPIInt size; 4416 4417 PetscFunctionBegin; 4418 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4419 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4420 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4421 if (size > 1) { 4422 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4423 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4424 } else { 4425 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4426 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4427 } 4428 PetscFunctionReturn(0); 4429 } 4430 4431 /*@C 4432 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4433 4434 Not collective 4435 4436 Input Parameter: 4437 . A - The MPIAIJ matrix 4438 4439 Output Parameters: 4440 + Ad - The local diagonal block as a SeqAIJ matrix 4441 . Ao - The local off-diagonal block as a SeqAIJ matrix 4442 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4443 4444 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4445 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4446 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4447 local column numbers to global column numbers in the original matrix. 4448 4449 Level: intermediate 4450 4451 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4452 @*/ 4453 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4454 { 4455 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4456 PetscBool flg; 4457 PetscErrorCode ierr; 4458 4459 PetscFunctionBegin; 4460 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4461 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4462 if (Ad) *Ad = a->A; 4463 if (Ao) *Ao = a->B; 4464 if (colmap) *colmap = a->garray; 4465 PetscFunctionReturn(0); 4466 } 4467 4468 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4469 { 4470 PetscErrorCode ierr; 4471 PetscInt m,N,i,rstart,nnz,Ii; 4472 PetscInt *indx; 4473 PetscScalar *values; 4474 4475 PetscFunctionBegin; 4476 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4477 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4478 PetscInt *dnz,*onz,sum,bs,cbs; 4479 4480 if (n == PETSC_DECIDE) { 4481 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4482 } 4483 /* Check sum(n) = N */ 4484 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4485 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4486 4487 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4488 rstart -= m; 4489 4490 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4491 for (i=0; i<m; i++) { 4492 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4493 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4494 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4495 } 4496 4497 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4498 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4499 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4500 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4501 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4502 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4503 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4504 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4505 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4506 } 4507 4508 /* numeric phase */ 4509 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4510 for (i=0; i<m; i++) { 4511 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4512 Ii = i + rstart; 4513 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4514 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4515 } 4516 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4517 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 PetscFunctionReturn(0); 4519 } 4520 4521 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4522 { 4523 PetscErrorCode ierr; 4524 PetscMPIInt rank; 4525 PetscInt m,N,i,rstart,nnz; 4526 size_t len; 4527 const PetscInt *indx; 4528 PetscViewer out; 4529 char *name; 4530 Mat B; 4531 const PetscScalar *values; 4532 4533 PetscFunctionBegin; 4534 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4535 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4536 /* Should this be the type of the diagonal block of A? */ 4537 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4538 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4539 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4540 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4541 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4542 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4543 for (i=0; i<m; i++) { 4544 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4545 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4546 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4547 } 4548 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4549 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4550 4551 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4552 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4553 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4554 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4555 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4556 ierr = PetscFree(name);CHKERRQ(ierr); 4557 ierr = MatView(B,out);CHKERRQ(ierr); 4558 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4559 ierr = MatDestroy(&B);CHKERRQ(ierr); 4560 PetscFunctionReturn(0); 4561 } 4562 4563 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4564 { 4565 PetscErrorCode ierr; 4566 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4567 4568 PetscFunctionBegin; 4569 if (!merge) PetscFunctionReturn(0); 4570 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4571 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4580 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4581 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4582 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4583 ierr = PetscFree(merge);CHKERRQ(ierr); 4584 PetscFunctionReturn(0); 4585 } 4586 4587 #include <../src/mat/utils/freespace.h> 4588 #include <petscbt.h> 4589 4590 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4591 { 4592 PetscErrorCode ierr; 4593 MPI_Comm comm; 4594 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4595 PetscMPIInt size,rank,taga,*len_s; 4596 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4597 PetscInt proc,m; 4598 PetscInt **buf_ri,**buf_rj; 4599 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4600 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4601 MPI_Request *s_waits,*r_waits; 4602 MPI_Status *status; 4603 MatScalar *aa=a->a; 4604 MatScalar **abuf_r,*ba_i; 4605 Mat_Merge_SeqsToMPI *merge; 4606 PetscContainer container; 4607 4608 PetscFunctionBegin; 4609 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4610 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4611 4612 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4613 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4614 4615 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4616 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4617 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4618 4619 bi = merge->bi; 4620 bj = merge->bj; 4621 buf_ri = merge->buf_ri; 4622 buf_rj = merge->buf_rj; 4623 4624 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4625 owners = merge->rowmap->range; 4626 len_s = merge->len_s; 4627 4628 /* send and recv matrix values */ 4629 /*-----------------------------*/ 4630 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4631 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4632 4633 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4634 for (proc=0,k=0; proc<size; proc++) { 4635 if (!len_s[proc]) continue; 4636 i = owners[proc]; 4637 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4638 k++; 4639 } 4640 4641 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4642 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4643 ierr = PetscFree(status);CHKERRQ(ierr); 4644 4645 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4646 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4647 4648 /* insert mat values of mpimat */ 4649 /*----------------------------*/ 4650 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4651 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4652 4653 for (k=0; k<merge->nrecv; k++) { 4654 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4655 nrows = *(buf_ri_k[k]); 4656 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4657 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4658 } 4659 4660 /* set values of ba */ 4661 m = merge->rowmap->n; 4662 for (i=0; i<m; i++) { 4663 arow = owners[rank] + i; 4664 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4665 bnzi = bi[i+1] - bi[i]; 4666 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4667 4668 /* add local non-zero vals of this proc's seqmat into ba */ 4669 anzi = ai[arow+1] - ai[arow]; 4670 aj = a->j + ai[arow]; 4671 aa = a->a + ai[arow]; 4672 nextaj = 0; 4673 for (j=0; nextaj<anzi; j++) { 4674 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4675 ba_i[j] += aa[nextaj++]; 4676 } 4677 } 4678 4679 /* add received vals into ba */ 4680 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4681 /* i-th row */ 4682 if (i == *nextrow[k]) { 4683 anzi = *(nextai[k]+1) - *nextai[k]; 4684 aj = buf_rj[k] + *(nextai[k]); 4685 aa = abuf_r[k] + *(nextai[k]); 4686 nextaj = 0; 4687 for (j=0; nextaj<anzi; j++) { 4688 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4689 ba_i[j] += aa[nextaj++]; 4690 } 4691 } 4692 nextrow[k]++; nextai[k]++; 4693 } 4694 } 4695 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4696 } 4697 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4698 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4699 4700 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4701 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4702 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4703 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4704 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4705 PetscFunctionReturn(0); 4706 } 4707 4708 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4709 { 4710 PetscErrorCode ierr; 4711 Mat B_mpi; 4712 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4713 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4714 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4715 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4716 PetscInt len,proc,*dnz,*onz,bs,cbs; 4717 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4718 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4719 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4720 MPI_Status *status; 4721 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4722 PetscBT lnkbt; 4723 Mat_Merge_SeqsToMPI *merge; 4724 PetscContainer container; 4725 4726 PetscFunctionBegin; 4727 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4728 4729 /* make sure it is a PETSc comm */ 4730 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4731 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4732 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4733 4734 ierr = PetscNew(&merge);CHKERRQ(ierr); 4735 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4736 4737 /* determine row ownership */ 4738 /*---------------------------------------------------------*/ 4739 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4740 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4741 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4742 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4743 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4744 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4745 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4746 4747 m = merge->rowmap->n; 4748 owners = merge->rowmap->range; 4749 4750 /* determine the number of messages to send, their lengths */ 4751 /*---------------------------------------------------------*/ 4752 len_s = merge->len_s; 4753 4754 len = 0; /* length of buf_si[] */ 4755 merge->nsend = 0; 4756 for (proc=0; proc<size; proc++) { 4757 len_si[proc] = 0; 4758 if (proc == rank) { 4759 len_s[proc] = 0; 4760 } else { 4761 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4762 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4763 } 4764 if (len_s[proc]) { 4765 merge->nsend++; 4766 nrows = 0; 4767 for (i=owners[proc]; i<owners[proc+1]; i++) { 4768 if (ai[i+1] > ai[i]) nrows++; 4769 } 4770 len_si[proc] = 2*(nrows+1); 4771 len += len_si[proc]; 4772 } 4773 } 4774 4775 /* determine the number and length of messages to receive for ij-structure */ 4776 /*-------------------------------------------------------------------------*/ 4777 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4778 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4779 4780 /* post the Irecv of j-structure */ 4781 /*-------------------------------*/ 4782 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4783 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4784 4785 /* post the Isend of j-structure */ 4786 /*--------------------------------*/ 4787 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4788 4789 for (proc=0, k=0; proc<size; proc++) { 4790 if (!len_s[proc]) continue; 4791 i = owners[proc]; 4792 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4793 k++; 4794 } 4795 4796 /* receives and sends of j-structure are complete */ 4797 /*------------------------------------------------*/ 4798 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4799 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4800 4801 /* send and recv i-structure */ 4802 /*---------------------------*/ 4803 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4804 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4805 4806 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4807 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4808 for (proc=0,k=0; proc<size; proc++) { 4809 if (!len_s[proc]) continue; 4810 /* form outgoing message for i-structure: 4811 buf_si[0]: nrows to be sent 4812 [1:nrows]: row index (global) 4813 [nrows+1:2*nrows+1]: i-structure index 4814 */ 4815 /*-------------------------------------------*/ 4816 nrows = len_si[proc]/2 - 1; 4817 buf_si_i = buf_si + nrows+1; 4818 buf_si[0] = nrows; 4819 buf_si_i[0] = 0; 4820 nrows = 0; 4821 for (i=owners[proc]; i<owners[proc+1]; i++) { 4822 anzi = ai[i+1] - ai[i]; 4823 if (anzi) { 4824 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4825 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4826 nrows++; 4827 } 4828 } 4829 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4830 k++; 4831 buf_si += len_si[proc]; 4832 } 4833 4834 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4835 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4836 4837 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4838 for (i=0; i<merge->nrecv; i++) { 4839 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4840 } 4841 4842 ierr = PetscFree(len_si);CHKERRQ(ierr); 4843 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4844 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4845 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4846 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4847 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4848 ierr = PetscFree(status);CHKERRQ(ierr); 4849 4850 /* compute a local seq matrix in each processor */ 4851 /*----------------------------------------------*/ 4852 /* allocate bi array and free space for accumulating nonzero column info */ 4853 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4854 bi[0] = 0; 4855 4856 /* create and initialize a linked list */ 4857 nlnk = N+1; 4858 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4859 4860 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4861 len = ai[owners[rank+1]] - ai[owners[rank]]; 4862 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4863 4864 current_space = free_space; 4865 4866 /* determine symbolic info for each local row */ 4867 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4868 4869 for (k=0; k<merge->nrecv; k++) { 4870 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4871 nrows = *buf_ri_k[k]; 4872 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4873 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4874 } 4875 4876 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4877 len = 0; 4878 for (i=0; i<m; i++) { 4879 bnzi = 0; 4880 /* add local non-zero cols of this proc's seqmat into lnk */ 4881 arow = owners[rank] + i; 4882 anzi = ai[arow+1] - ai[arow]; 4883 aj = a->j + ai[arow]; 4884 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4885 bnzi += nlnk; 4886 /* add received col data into lnk */ 4887 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4888 if (i == *nextrow[k]) { /* i-th row */ 4889 anzi = *(nextai[k]+1) - *nextai[k]; 4890 aj = buf_rj[k] + *nextai[k]; 4891 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4892 bnzi += nlnk; 4893 nextrow[k]++; nextai[k]++; 4894 } 4895 } 4896 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4897 4898 /* if free space is not available, make more free space */ 4899 if (current_space->local_remaining<bnzi) { 4900 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4901 nspacedouble++; 4902 } 4903 /* copy data into free space, then initialize lnk */ 4904 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4905 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4906 4907 current_space->array += bnzi; 4908 current_space->local_used += bnzi; 4909 current_space->local_remaining -= bnzi; 4910 4911 bi[i+1] = bi[i] + bnzi; 4912 } 4913 4914 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4915 4916 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4917 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4918 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4919 4920 /* create symbolic parallel matrix B_mpi */ 4921 /*---------------------------------------*/ 4922 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4923 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4924 if (n==PETSC_DECIDE) { 4925 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4926 } else { 4927 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4928 } 4929 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4930 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4931 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4932 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4933 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4934 4935 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4936 B_mpi->assembled = PETSC_FALSE; 4937 merge->bi = bi; 4938 merge->bj = bj; 4939 merge->buf_ri = buf_ri; 4940 merge->buf_rj = buf_rj; 4941 merge->coi = NULL; 4942 merge->coj = NULL; 4943 merge->owners_co = NULL; 4944 4945 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4946 4947 /* attach the supporting struct to B_mpi for reuse */ 4948 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4949 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4950 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4951 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4952 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4953 *mpimat = B_mpi; 4954 4955 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4956 PetscFunctionReturn(0); 4957 } 4958 4959 /*@C 4960 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4961 matrices from each processor 4962 4963 Collective 4964 4965 Input Parameters: 4966 + comm - the communicators the parallel matrix will live on 4967 . seqmat - the input sequential matrices 4968 . m - number of local rows (or PETSC_DECIDE) 4969 . n - number of local columns (or PETSC_DECIDE) 4970 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4971 4972 Output Parameter: 4973 . mpimat - the parallel matrix generated 4974 4975 Level: advanced 4976 4977 Notes: 4978 The dimensions of the sequential matrix in each processor MUST be the same. 4979 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4980 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4981 @*/ 4982 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4983 { 4984 PetscErrorCode ierr; 4985 PetscMPIInt size; 4986 4987 PetscFunctionBegin; 4988 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4989 if (size == 1) { 4990 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4991 if (scall == MAT_INITIAL_MATRIX) { 4992 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4993 } else { 4994 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4995 } 4996 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4997 PetscFunctionReturn(0); 4998 } 4999 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5000 if (scall == MAT_INITIAL_MATRIX) { 5001 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5002 } 5003 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5004 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5005 PetscFunctionReturn(0); 5006 } 5007 5008 /*@ 5009 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5010 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5011 with MatGetSize() 5012 5013 Not Collective 5014 5015 Input Parameters: 5016 + A - the matrix 5017 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5018 5019 Output Parameter: 5020 . A_loc - the local sequential matrix generated 5021 5022 Level: developer 5023 5024 Notes: 5025 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5026 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5027 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5028 modify the values of the returned A_loc. 5029 5030 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5031 @*/ 5032 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5033 { 5034 PetscErrorCode ierr; 5035 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5036 Mat_SeqAIJ *mat,*a,*b; 5037 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5038 const PetscScalar *aa,*ba,*aav,*bav; 5039 PetscScalar *ca,*cam; 5040 PetscMPIInt size; 5041 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5042 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5043 PetscBool match; 5044 5045 PetscFunctionBegin; 5046 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5047 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5048 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5049 if (size == 1) { 5050 if (scall == MAT_INITIAL_MATRIX) { 5051 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5052 *A_loc = mpimat->A; 5053 } else if (scall == MAT_REUSE_MATRIX) { 5054 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5055 } 5056 PetscFunctionReturn(0); 5057 } 5058 5059 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5060 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5061 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5062 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5063 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5064 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5065 aa = aav; 5066 ba = bav; 5067 if (scall == MAT_INITIAL_MATRIX) { 5068 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5069 ci[0] = 0; 5070 for (i=0; i<am; i++) { 5071 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5072 } 5073 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5074 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5075 k = 0; 5076 for (i=0; i<am; i++) { 5077 ncols_o = bi[i+1] - bi[i]; 5078 ncols_d = ai[i+1] - ai[i]; 5079 /* off-diagonal portion of A */ 5080 for (jo=0; jo<ncols_o; jo++) { 5081 col = cmap[*bj]; 5082 if (col >= cstart) break; 5083 cj[k] = col; bj++; 5084 ca[k++] = *ba++; 5085 } 5086 /* diagonal portion of A */ 5087 for (j=0; j<ncols_d; j++) { 5088 cj[k] = cstart + *aj++; 5089 ca[k++] = *aa++; 5090 } 5091 /* off-diagonal portion of A */ 5092 for (j=jo; j<ncols_o; j++) { 5093 cj[k] = cmap[*bj++]; 5094 ca[k++] = *ba++; 5095 } 5096 } 5097 /* put together the new matrix */ 5098 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5099 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5100 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5101 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5102 mat->free_a = PETSC_TRUE; 5103 mat->free_ij = PETSC_TRUE; 5104 mat->nonew = 0; 5105 } else if (scall == MAT_REUSE_MATRIX) { 5106 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5107 #if defined(PETSC_USE_DEVICE) 5108 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5109 #endif 5110 ci = mat->i; cj = mat->j; cam = mat->a; 5111 for (i=0; i<am; i++) { 5112 /* off-diagonal portion of A */ 5113 ncols_o = bi[i+1] - bi[i]; 5114 for (jo=0; jo<ncols_o; jo++) { 5115 col = cmap[*bj]; 5116 if (col >= cstart) break; 5117 *cam++ = *ba++; bj++; 5118 } 5119 /* diagonal portion of A */ 5120 ncols_d = ai[i+1] - ai[i]; 5121 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5122 /* off-diagonal portion of A */ 5123 for (j=jo; j<ncols_o; j++) { 5124 *cam++ = *ba++; bj++; 5125 } 5126 } 5127 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5128 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5129 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5130 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5131 PetscFunctionReturn(0); 5132 } 5133 5134 /*@ 5135 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5136 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5137 5138 Not Collective 5139 5140 Input Parameters: 5141 + A - the matrix 5142 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5143 5144 Output Parameters: 5145 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5146 - A_loc - the local sequential matrix generated 5147 5148 Level: developer 5149 5150 Notes: 5151 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5152 5153 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5154 5155 @*/ 5156 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5157 { 5158 PetscErrorCode ierr; 5159 Mat Ao,Ad; 5160 const PetscInt *cmap; 5161 PetscMPIInt size; 5162 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5163 5164 PetscFunctionBegin; 5165 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5166 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5167 if (size == 1) { 5168 if (scall == MAT_INITIAL_MATRIX) { 5169 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5170 *A_loc = Ad; 5171 } else if (scall == MAT_REUSE_MATRIX) { 5172 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5173 } 5174 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5175 PetscFunctionReturn(0); 5176 } 5177 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5178 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5179 if (f) { 5180 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5181 } else { 5182 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5183 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5184 Mat_SeqAIJ *c; 5185 PetscInt *ai = a->i, *aj = a->j; 5186 PetscInt *bi = b->i, *bj = b->j; 5187 PetscInt *ci,*cj; 5188 const PetscScalar *aa,*ba; 5189 PetscScalar *ca; 5190 PetscInt i,j,am,dn,on; 5191 5192 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5193 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5194 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5195 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5196 if (scall == MAT_INITIAL_MATRIX) { 5197 PetscInt k; 5198 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5199 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5200 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5201 ci[0] = 0; 5202 for (i=0,k=0; i<am; i++) { 5203 const PetscInt ncols_o = bi[i+1] - bi[i]; 5204 const PetscInt ncols_d = ai[i+1] - ai[i]; 5205 ci[i+1] = ci[i] + ncols_o + ncols_d; 5206 /* diagonal portion of A */ 5207 for (j=0; j<ncols_d; j++,k++) { 5208 cj[k] = *aj++; 5209 ca[k] = *aa++; 5210 } 5211 /* off-diagonal portion of A */ 5212 for (j=0; j<ncols_o; j++,k++) { 5213 cj[k] = dn + *bj++; 5214 ca[k] = *ba++; 5215 } 5216 } 5217 /* put together the new matrix */ 5218 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5219 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5220 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5221 c = (Mat_SeqAIJ*)(*A_loc)->data; 5222 c->free_a = PETSC_TRUE; 5223 c->free_ij = PETSC_TRUE; 5224 c->nonew = 0; 5225 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5226 } else if (scall == MAT_REUSE_MATRIX) { 5227 #if defined(PETSC_HAVE_DEVICE) 5228 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5229 #endif 5230 c = (Mat_SeqAIJ*)(*A_loc)->data; 5231 ca = c->a; 5232 for (i=0; i<am; i++) { 5233 const PetscInt ncols_d = ai[i+1] - ai[i]; 5234 const PetscInt ncols_o = bi[i+1] - bi[i]; 5235 /* diagonal portion of A */ 5236 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5237 /* off-diagonal portion of A */ 5238 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5239 } 5240 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5241 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5242 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5243 if (glob) { 5244 PetscInt cst, *gidx; 5245 5246 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5247 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5248 for (i=0; i<dn; i++) gidx[i] = cst + i; 5249 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5250 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5251 } 5252 } 5253 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5254 PetscFunctionReturn(0); 5255 } 5256 5257 /*@C 5258 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5259 5260 Not Collective 5261 5262 Input Parameters: 5263 + A - the matrix 5264 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5265 - row, col - index sets of rows and columns to extract (or NULL) 5266 5267 Output Parameter: 5268 . A_loc - the local sequential matrix generated 5269 5270 Level: developer 5271 5272 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5273 5274 @*/ 5275 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5276 { 5277 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5278 PetscErrorCode ierr; 5279 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5280 IS isrowa,iscola; 5281 Mat *aloc; 5282 PetscBool match; 5283 5284 PetscFunctionBegin; 5285 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5286 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5287 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5288 if (!row) { 5289 start = A->rmap->rstart; end = A->rmap->rend; 5290 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5291 } else { 5292 isrowa = *row; 5293 } 5294 if (!col) { 5295 start = A->cmap->rstart; 5296 cmap = a->garray; 5297 nzA = a->A->cmap->n; 5298 nzB = a->B->cmap->n; 5299 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5300 ncols = 0; 5301 for (i=0; i<nzB; i++) { 5302 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5303 else break; 5304 } 5305 imark = i; 5306 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5307 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5308 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5309 } else { 5310 iscola = *col; 5311 } 5312 if (scall != MAT_INITIAL_MATRIX) { 5313 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5314 aloc[0] = *A_loc; 5315 } 5316 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5317 if (!col) { /* attach global id of condensed columns */ 5318 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5319 } 5320 *A_loc = aloc[0]; 5321 ierr = PetscFree(aloc);CHKERRQ(ierr); 5322 if (!row) { 5323 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5324 } 5325 if (!col) { 5326 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5327 } 5328 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5329 PetscFunctionReturn(0); 5330 } 5331 5332 /* 5333 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5334 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5335 * on a global size. 5336 * */ 5337 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5338 { 5339 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5340 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5341 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5342 PetscMPIInt owner; 5343 PetscSFNode *iremote,*oiremote; 5344 const PetscInt *lrowindices; 5345 PetscErrorCode ierr; 5346 PetscSF sf,osf; 5347 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5348 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5349 MPI_Comm comm; 5350 ISLocalToGlobalMapping mapping; 5351 5352 PetscFunctionBegin; 5353 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5354 /* plocalsize is the number of roots 5355 * nrows is the number of leaves 5356 * */ 5357 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5358 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5359 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5360 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5361 for (i=0;i<nrows;i++) { 5362 /* Find a remote index and an owner for a row 5363 * The row could be local or remote 5364 * */ 5365 owner = 0; 5366 lidx = 0; 5367 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5368 iremote[i].index = lidx; 5369 iremote[i].rank = owner; 5370 } 5371 /* Create SF to communicate how many nonzero columns for each row */ 5372 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5373 /* SF will figure out the number of nonzero colunms for each row, and their 5374 * offsets 5375 * */ 5376 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5377 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5378 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5379 5380 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5381 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5382 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5383 roffsets[0] = 0; 5384 roffsets[1] = 0; 5385 for (i=0;i<plocalsize;i++) { 5386 /* diag */ 5387 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5388 /* off diag */ 5389 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5390 /* compute offsets so that we relative location for each row */ 5391 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5392 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5393 } 5394 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5395 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5396 /* 'r' means root, and 'l' means leaf */ 5397 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5398 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5399 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5400 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5401 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5402 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5403 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5404 dntotalcols = 0; 5405 ontotalcols = 0; 5406 ncol = 0; 5407 for (i=0;i<nrows;i++) { 5408 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5409 ncol = PetscMax(pnnz[i],ncol); 5410 /* diag */ 5411 dntotalcols += nlcols[i*2+0]; 5412 /* off diag */ 5413 ontotalcols += nlcols[i*2+1]; 5414 } 5415 /* We do not need to figure the right number of columns 5416 * since all the calculations will be done by going through the raw data 5417 * */ 5418 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5419 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5420 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5421 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5422 /* diag */ 5423 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5424 /* off diag */ 5425 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5426 /* diag */ 5427 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5428 /* off diag */ 5429 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5430 dntotalcols = 0; 5431 ontotalcols = 0; 5432 ntotalcols = 0; 5433 for (i=0;i<nrows;i++) { 5434 owner = 0; 5435 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5436 /* Set iremote for diag matrix */ 5437 for (j=0;j<nlcols[i*2+0];j++) { 5438 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5439 iremote[dntotalcols].rank = owner; 5440 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5441 ilocal[dntotalcols++] = ntotalcols++; 5442 } 5443 /* off diag */ 5444 for (j=0;j<nlcols[i*2+1];j++) { 5445 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5446 oiremote[ontotalcols].rank = owner; 5447 oilocal[ontotalcols++] = ntotalcols++; 5448 } 5449 } 5450 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5451 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5452 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5453 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5454 /* P serves as roots and P_oth is leaves 5455 * Diag matrix 5456 * */ 5457 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5458 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5459 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5460 5461 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5462 /* Off diag */ 5463 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5464 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5465 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5466 /* We operate on the matrix internal data for saving memory */ 5467 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5468 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5469 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5470 /* Convert to global indices for diag matrix */ 5471 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5472 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5473 /* We want P_oth store global indices */ 5474 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5475 /* Use memory scalable approach */ 5476 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5477 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5478 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5479 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5480 /* Convert back to local indices */ 5481 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5482 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5483 nout = 0; 5484 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5485 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5486 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5487 /* Exchange values */ 5488 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5489 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5490 /* Stop PETSc from shrinking memory */ 5491 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5492 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5493 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5494 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5495 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5496 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5497 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5498 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5499 PetscFunctionReturn(0); 5500 } 5501 5502 /* 5503 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5504 * This supports MPIAIJ and MAIJ 5505 * */ 5506 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5507 { 5508 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5509 Mat_SeqAIJ *p_oth; 5510 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5511 IS rows,map; 5512 PetscHMapI hamp; 5513 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5514 MPI_Comm comm; 5515 PetscSF sf,osf; 5516 PetscBool has; 5517 PetscErrorCode ierr; 5518 5519 PetscFunctionBegin; 5520 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5521 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5522 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5523 * and then create a submatrix (that often is an overlapping matrix) 5524 * */ 5525 if (reuse == MAT_INITIAL_MATRIX) { 5526 /* Use a hash table to figure out unique keys */ 5527 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5528 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5529 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5530 count = 0; 5531 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5532 for (i=0;i<a->B->cmap->n;i++) { 5533 key = a->garray[i]/dof; 5534 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5535 if (!has) { 5536 mapping[i] = count; 5537 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5538 } else { 5539 /* Current 'i' has the same value the previous step */ 5540 mapping[i] = count-1; 5541 } 5542 } 5543 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5544 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5545 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count); 5546 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5547 off = 0; 5548 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5549 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5550 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5551 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5552 /* In case, the matrix was already created but users want to recreate the matrix */ 5553 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5554 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5555 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5556 ierr = ISDestroy(&map);CHKERRQ(ierr); 5557 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5558 } else if (reuse == MAT_REUSE_MATRIX) { 5559 /* If matrix was already created, we simply update values using SF objects 5560 * that as attached to the matrix ealier. 5561 * */ 5562 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5563 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5564 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5565 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5566 /* Update values in place */ 5567 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5568 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5569 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5570 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5571 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5572 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5573 PetscFunctionReturn(0); 5574 } 5575 5576 /*@C 5577 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5578 5579 Collective on Mat 5580 5581 Input Parameters: 5582 + A - the first matrix in mpiaij format 5583 . B - the second matrix in mpiaij format 5584 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5585 5586 Input/Output Parameters: 5587 + rowb - index sets of rows of B to extract (or NULL), modified on output 5588 - colb - index sets of columns of B to extract (or NULL), modified on output 5589 5590 Output Parameter: 5591 . B_seq - the sequential matrix generated 5592 5593 Level: developer 5594 5595 @*/ 5596 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5597 { 5598 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5599 PetscErrorCode ierr; 5600 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5601 IS isrowb,iscolb; 5602 Mat *bseq=NULL; 5603 5604 PetscFunctionBegin; 5605 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5606 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5607 } 5608 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5609 5610 if (scall == MAT_INITIAL_MATRIX) { 5611 start = A->cmap->rstart; 5612 cmap = a->garray; 5613 nzA = a->A->cmap->n; 5614 nzB = a->B->cmap->n; 5615 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5616 ncols = 0; 5617 for (i=0; i<nzB; i++) { /* row < local row index */ 5618 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5619 else break; 5620 } 5621 imark = i; 5622 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5623 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5624 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5625 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5626 } else { 5627 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5628 isrowb = *rowb; iscolb = *colb; 5629 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5630 bseq[0] = *B_seq; 5631 } 5632 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5633 *B_seq = bseq[0]; 5634 ierr = PetscFree(bseq);CHKERRQ(ierr); 5635 if (!rowb) { 5636 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5637 } else { 5638 *rowb = isrowb; 5639 } 5640 if (!colb) { 5641 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5642 } else { 5643 *colb = iscolb; 5644 } 5645 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5646 PetscFunctionReturn(0); 5647 } 5648 5649 /* 5650 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5651 of the OFF-DIAGONAL portion of local A 5652 5653 Collective on Mat 5654 5655 Input Parameters: 5656 + A,B - the matrices in mpiaij format 5657 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5658 5659 Output Parameter: 5660 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5661 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5662 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5663 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5664 5665 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5666 for this matrix. This is not desirable.. 5667 5668 Level: developer 5669 5670 */ 5671 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5672 { 5673 PetscErrorCode ierr; 5674 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5675 Mat_SeqAIJ *b_oth; 5676 VecScatter ctx; 5677 MPI_Comm comm; 5678 const PetscMPIInt *rprocs,*sprocs; 5679 const PetscInt *srow,*rstarts,*sstarts; 5680 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5681 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5682 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5683 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5684 PetscMPIInt size,tag,rank,nreqs; 5685 5686 PetscFunctionBegin; 5687 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5688 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5689 5690 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5691 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5692 } 5693 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5694 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5695 5696 if (size == 1) { 5697 startsj_s = NULL; 5698 bufa_ptr = NULL; 5699 *B_oth = NULL; 5700 PetscFunctionReturn(0); 5701 } 5702 5703 ctx = a->Mvctx; 5704 tag = ((PetscObject)ctx)->tag; 5705 5706 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5707 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5708 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5709 ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr); 5710 ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr); 5711 rwaits = reqs; 5712 swaits = reqs + nrecvs; 5713 5714 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5715 if (scall == MAT_INITIAL_MATRIX) { 5716 /* i-array */ 5717 /*---------*/ 5718 /* post receives */ 5719 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5720 for (i=0; i<nrecvs; i++) { 5721 rowlen = rvalues + rstarts[i]*rbs; 5722 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5723 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5724 } 5725 5726 /* pack the outgoing message */ 5727 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5728 5729 sstartsj[0] = 0; 5730 rstartsj[0] = 0; 5731 len = 0; /* total length of j or a array to be sent */ 5732 if (nsends) { 5733 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5734 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5735 } 5736 for (i=0; i<nsends; i++) { 5737 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5738 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5739 for (j=0; j<nrows; j++) { 5740 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5741 for (l=0; l<sbs; l++) { 5742 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5743 5744 rowlen[j*sbs+l] = ncols; 5745 5746 len += ncols; 5747 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5748 } 5749 k++; 5750 } 5751 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5752 5753 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5754 } 5755 /* recvs and sends of i-array are completed */ 5756 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5757 ierr = PetscFree(svalues);CHKERRQ(ierr); 5758 5759 /* allocate buffers for sending j and a arrays */ 5760 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5761 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5762 5763 /* create i-array of B_oth */ 5764 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5765 5766 b_othi[0] = 0; 5767 len = 0; /* total length of j or a array to be received */ 5768 k = 0; 5769 for (i=0; i<nrecvs; i++) { 5770 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5771 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5772 for (j=0; j<nrows; j++) { 5773 b_othi[k+1] = b_othi[k] + rowlen[j]; 5774 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5775 k++; 5776 } 5777 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5778 } 5779 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5780 5781 /* allocate space for j and a arrrays of B_oth */ 5782 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5783 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5784 5785 /* j-array */ 5786 /*---------*/ 5787 /* post receives of j-array */ 5788 for (i=0; i<nrecvs; i++) { 5789 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5790 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5791 } 5792 5793 /* pack the outgoing message j-array */ 5794 if (nsends) k = sstarts[0]; 5795 for (i=0; i<nsends; i++) { 5796 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5797 bufJ = bufj+sstartsj[i]; 5798 for (j=0; j<nrows; j++) { 5799 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5800 for (ll=0; ll<sbs; ll++) { 5801 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5802 for (l=0; l<ncols; l++) { 5803 *bufJ++ = cols[l]; 5804 } 5805 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5806 } 5807 } 5808 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5809 } 5810 5811 /* recvs and sends of j-array are completed */ 5812 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5813 } else if (scall == MAT_REUSE_MATRIX) { 5814 sstartsj = *startsj_s; 5815 rstartsj = *startsj_r; 5816 bufa = *bufa_ptr; 5817 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5818 b_otha = b_oth->a; 5819 #if defined(PETSC_HAVE_DEVICE) 5820 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5821 #endif 5822 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5823 5824 /* a-array */ 5825 /*---------*/ 5826 /* post receives of a-array */ 5827 for (i=0; i<nrecvs; i++) { 5828 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5829 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5830 } 5831 5832 /* pack the outgoing message a-array */ 5833 if (nsends) k = sstarts[0]; 5834 for (i=0; i<nsends; i++) { 5835 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5836 bufA = bufa+sstartsj[i]; 5837 for (j=0; j<nrows; j++) { 5838 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5839 for (ll=0; ll<sbs; ll++) { 5840 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5841 for (l=0; l<ncols; l++) { 5842 *bufA++ = vals[l]; 5843 } 5844 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5845 } 5846 } 5847 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5848 } 5849 /* recvs and sends of a-array are completed */ 5850 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5851 ierr = PetscFree(reqs);CHKERRQ(ierr); 5852 5853 if (scall == MAT_INITIAL_MATRIX) { 5854 /* put together the new matrix */ 5855 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5856 5857 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5858 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5859 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5860 b_oth->free_a = PETSC_TRUE; 5861 b_oth->free_ij = PETSC_TRUE; 5862 b_oth->nonew = 0; 5863 5864 ierr = PetscFree(bufj);CHKERRQ(ierr); 5865 if (!startsj_s || !bufa_ptr) { 5866 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5867 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5868 } else { 5869 *startsj_s = sstartsj; 5870 *startsj_r = rstartsj; 5871 *bufa_ptr = bufa; 5872 } 5873 } 5874 5875 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5876 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5877 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5878 PetscFunctionReturn(0); 5879 } 5880 5881 /*@C 5882 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5883 5884 Not Collective 5885 5886 Input Parameter: 5887 . A - The matrix in mpiaij format 5888 5889 Output Parameters: 5890 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5891 . colmap - A map from global column index to local index into lvec 5892 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5893 5894 Level: developer 5895 5896 @*/ 5897 #if defined(PETSC_USE_CTABLE) 5898 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5899 #else 5900 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5901 #endif 5902 { 5903 Mat_MPIAIJ *a; 5904 5905 PetscFunctionBegin; 5906 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5907 PetscValidPointer(lvec, 2); 5908 PetscValidPointer(colmap, 3); 5909 PetscValidPointer(multScatter, 4); 5910 a = (Mat_MPIAIJ*) A->data; 5911 if (lvec) *lvec = a->lvec; 5912 if (colmap) *colmap = a->colmap; 5913 if (multScatter) *multScatter = a->Mvctx; 5914 PetscFunctionReturn(0); 5915 } 5916 5917 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5919 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5920 #if defined(PETSC_HAVE_MKL_SPARSE) 5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5922 #endif 5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5924 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5925 #if defined(PETSC_HAVE_ELEMENTAL) 5926 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5927 #endif 5928 #if defined(PETSC_HAVE_SCALAPACK) 5929 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5930 #endif 5931 #if defined(PETSC_HAVE_HYPRE) 5932 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5933 #endif 5934 #if defined(PETSC_HAVE_CUDA) 5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5936 #endif 5937 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5939 #endif 5940 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5941 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5942 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5943 5944 /* 5945 Computes (B'*A')' since computing B*A directly is untenable 5946 5947 n p p 5948 [ ] [ ] [ ] 5949 m [ A ] * n [ B ] = m [ C ] 5950 [ ] [ ] [ ] 5951 5952 */ 5953 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5954 { 5955 PetscErrorCode ierr; 5956 Mat At,Bt,Ct; 5957 5958 PetscFunctionBegin; 5959 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5960 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5961 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5962 ierr = MatDestroy(&At);CHKERRQ(ierr); 5963 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5964 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5965 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5966 PetscFunctionReturn(0); 5967 } 5968 5969 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5970 { 5971 PetscErrorCode ierr; 5972 PetscBool cisdense; 5973 5974 PetscFunctionBegin; 5975 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5976 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5977 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5978 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5979 if (!cisdense) { 5980 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5981 } 5982 ierr = MatSetUp(C);CHKERRQ(ierr); 5983 5984 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5985 PetscFunctionReturn(0); 5986 } 5987 5988 /* ----------------------------------------------------------------*/ 5989 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5990 { 5991 Mat_Product *product = C->product; 5992 Mat A = product->A,B=product->B; 5993 5994 PetscFunctionBegin; 5995 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5996 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5997 5998 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5999 C->ops->productsymbolic = MatProductSymbolic_AB; 6000 PetscFunctionReturn(0); 6001 } 6002 6003 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6004 { 6005 PetscErrorCode ierr; 6006 Mat_Product *product = C->product; 6007 6008 PetscFunctionBegin; 6009 if (product->type == MATPRODUCT_AB) { 6010 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6011 } 6012 PetscFunctionReturn(0); 6013 } 6014 /* ----------------------------------------------------------------*/ 6015 6016 /*MC 6017 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6018 6019 Options Database Keys: 6020 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6021 6022 Level: beginner 6023 6024 Notes: 6025 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6026 in this case the values associated with the rows and columns one passes in are set to zero 6027 in the matrix 6028 6029 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6030 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6031 6032 .seealso: MatCreateAIJ() 6033 M*/ 6034 6035 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6036 { 6037 Mat_MPIAIJ *b; 6038 PetscErrorCode ierr; 6039 PetscMPIInt size; 6040 6041 PetscFunctionBegin; 6042 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6043 6044 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6045 B->data = (void*)b; 6046 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6047 B->assembled = PETSC_FALSE; 6048 B->insertmode = NOT_SET_VALUES; 6049 b->size = size; 6050 6051 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6052 6053 /* build cache for off array entries formed */ 6054 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6055 6056 b->donotstash = PETSC_FALSE; 6057 b->colmap = NULL; 6058 b->garray = NULL; 6059 b->roworiented = PETSC_TRUE; 6060 6061 /* stuff used for matrix vector multiply */ 6062 b->lvec = NULL; 6063 b->Mvctx = NULL; 6064 6065 /* stuff for MatGetRow() */ 6066 b->rowindices = NULL; 6067 b->rowvalues = NULL; 6068 b->getrowactive = PETSC_FALSE; 6069 6070 /* flexible pointer used in CUSPARSE classes */ 6071 b->spptr = NULL; 6072 6073 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6074 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6075 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6076 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6083 #if defined(PETSC_HAVE_CUDA) 6084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6085 #endif 6086 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6088 #endif 6089 #if defined(PETSC_HAVE_MKL_SPARSE) 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6091 #endif 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6096 #if defined(PETSC_HAVE_ELEMENTAL) 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6098 #endif 6099 #if defined(PETSC_HAVE_SCALAPACK) 6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6101 #endif 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6104 #if defined(PETSC_HAVE_HYPRE) 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6107 #endif 6108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6110 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6111 PetscFunctionReturn(0); 6112 } 6113 6114 /*@C 6115 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6116 and "off-diagonal" part of the matrix in CSR format. 6117 6118 Collective 6119 6120 Input Parameters: 6121 + comm - MPI communicator 6122 . m - number of local rows (Cannot be PETSC_DECIDE) 6123 . n - This value should be the same as the local size used in creating the 6124 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6125 calculated if N is given) For square matrices n is almost always m. 6126 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6127 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6128 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6129 . j - column indices 6130 . a - matrix values 6131 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6132 . oj - column indices 6133 - oa - matrix values 6134 6135 Output Parameter: 6136 . mat - the matrix 6137 6138 Level: advanced 6139 6140 Notes: 6141 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6142 must free the arrays once the matrix has been destroyed and not before. 6143 6144 The i and j indices are 0 based 6145 6146 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6147 6148 This sets local rows and cannot be used to set off-processor values. 6149 6150 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6151 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6152 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6153 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6154 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6155 communication if it is known that only local entries will be set. 6156 6157 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6158 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6159 @*/ 6160 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6161 { 6162 PetscErrorCode ierr; 6163 Mat_MPIAIJ *maij; 6164 6165 PetscFunctionBegin; 6166 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6167 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6168 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6169 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6170 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6171 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6172 maij = (Mat_MPIAIJ*) (*mat)->data; 6173 6174 (*mat)->preallocated = PETSC_TRUE; 6175 6176 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6177 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6178 6179 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6180 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6181 6182 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6183 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6184 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6185 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6186 6187 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6188 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6189 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6190 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6191 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6192 PetscFunctionReturn(0); 6193 } 6194 6195 /* 6196 Special version for direct calls from Fortran 6197 */ 6198 #include <petsc/private/fortranimpl.h> 6199 6200 /* Change these macros so can be used in void function */ 6201 #undef CHKERRQ 6202 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6203 #undef SETERRQ2 6204 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6205 #undef SETERRQ3 6206 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6207 #undef SETERRQ 6208 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6209 6210 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6211 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6212 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6213 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6214 #else 6215 #endif 6216 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6217 { 6218 Mat mat = *mmat; 6219 PetscInt m = *mm, n = *mn; 6220 InsertMode addv = *maddv; 6221 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6222 PetscScalar value; 6223 PetscErrorCode ierr; 6224 6225 MatCheckPreallocated(mat,1); 6226 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6227 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6228 { 6229 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6230 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6231 PetscBool roworiented = aij->roworiented; 6232 6233 /* Some Variables required in the macro */ 6234 Mat A = aij->A; 6235 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6236 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6237 MatScalar *aa = a->a; 6238 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6239 Mat B = aij->B; 6240 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6241 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6242 MatScalar *ba = b->a; 6243 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6244 * cannot use "#if defined" inside a macro. */ 6245 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6246 6247 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6248 PetscInt nonew = a->nonew; 6249 MatScalar *ap1,*ap2; 6250 6251 PetscFunctionBegin; 6252 for (i=0; i<m; i++) { 6253 if (im[i] < 0) continue; 6254 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6255 if (im[i] >= rstart && im[i] < rend) { 6256 row = im[i] - rstart; 6257 lastcol1 = -1; 6258 rp1 = aj + ai[row]; 6259 ap1 = aa + ai[row]; 6260 rmax1 = aimax[row]; 6261 nrow1 = ailen[row]; 6262 low1 = 0; 6263 high1 = nrow1; 6264 lastcol2 = -1; 6265 rp2 = bj + bi[row]; 6266 ap2 = ba + bi[row]; 6267 rmax2 = bimax[row]; 6268 nrow2 = bilen[row]; 6269 low2 = 0; 6270 high2 = nrow2; 6271 6272 for (j=0; j<n; j++) { 6273 if (roworiented) value = v[i*n+j]; 6274 else value = v[i+j*m]; 6275 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6276 if (in[j] >= cstart && in[j] < cend) { 6277 col = in[j] - cstart; 6278 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6279 #if defined(PETSC_HAVE_DEVICE) 6280 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6281 #endif 6282 } else if (in[j] < 0) continue; 6283 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6284 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6285 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6286 } else { 6287 if (mat->was_assembled) { 6288 if (!aij->colmap) { 6289 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6290 } 6291 #if defined(PETSC_USE_CTABLE) 6292 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6293 col--; 6294 #else 6295 col = aij->colmap[in[j]] - 1; 6296 #endif 6297 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6298 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6299 col = in[j]; 6300 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6301 B = aij->B; 6302 b = (Mat_SeqAIJ*)B->data; 6303 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6304 rp2 = bj + bi[row]; 6305 ap2 = ba + bi[row]; 6306 rmax2 = bimax[row]; 6307 nrow2 = bilen[row]; 6308 low2 = 0; 6309 high2 = nrow2; 6310 bm = aij->B->rmap->n; 6311 ba = b->a; 6312 inserted = PETSC_FALSE; 6313 } 6314 } else col = in[j]; 6315 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6316 #if defined(PETSC_HAVE_DEVICE) 6317 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6318 #endif 6319 } 6320 } 6321 } else if (!aij->donotstash) { 6322 if (roworiented) { 6323 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6324 } else { 6325 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6326 } 6327 } 6328 } 6329 } 6330 PetscFunctionReturnVoid(); 6331 } 6332 6333 typedef struct { 6334 Mat *mp; /* intermediate products */ 6335 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6336 PetscInt cp; /* number of intermediate products */ 6337 6338 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6339 PetscInt *startsj_s,*startsj_r; 6340 PetscScalar *bufa; 6341 Mat P_oth; 6342 6343 /* may take advantage of merging product->B */ 6344 Mat Bloc; /* B-local by merging diag and off-diag */ 6345 6346 /* cusparse does not have support to split between symbolic and numeric phases. 6347 When api_user is true, we don't need to update the numerical values 6348 of the temporary storage */ 6349 PetscBool reusesym; 6350 6351 /* support for COO values insertion */ 6352 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6353 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6354 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6355 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6356 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6357 PetscMemType mtype; 6358 6359 /* customization */ 6360 PetscBool abmerge; 6361 PetscBool P_oth_bind; 6362 } MatMatMPIAIJBACKEND; 6363 6364 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6365 { 6366 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6367 PetscInt i; 6368 PetscErrorCode ierr; 6369 6370 PetscFunctionBegin; 6371 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6372 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6373 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6374 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6375 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6376 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6377 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6378 for (i = 0; i < mmdata->cp; i++) { 6379 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6380 } 6381 ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr); 6382 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6383 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6384 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6385 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6386 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6387 PetscFunctionReturn(0); 6388 } 6389 6390 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6391 { 6392 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6393 PetscErrorCode ierr; 6394 6395 PetscFunctionBegin; 6396 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6397 if (f) { 6398 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6399 } else { 6400 const PetscScalar *vv; 6401 6402 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6403 if (n && idx) { 6404 PetscScalar *w = v; 6405 const PetscInt *oi = idx; 6406 PetscInt j; 6407 6408 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6409 } else { 6410 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6411 } 6412 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6413 } 6414 PetscFunctionReturn(0); 6415 } 6416 6417 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6418 { 6419 MatMatMPIAIJBACKEND *mmdata; 6420 PetscInt i,n_d,n_o; 6421 PetscErrorCode ierr; 6422 6423 PetscFunctionBegin; 6424 MatCheckProduct(C,1); 6425 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6426 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6427 if (!mmdata->reusesym) { /* update temporary matrices */ 6428 if (mmdata->P_oth) { 6429 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6430 } 6431 if (mmdata->Bloc) { 6432 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6433 } 6434 } 6435 mmdata->reusesym = PETSC_FALSE; 6436 6437 for (i = 0; i < mmdata->cp; i++) { 6438 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6439 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6440 } 6441 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6442 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6443 6444 if (mmdata->mptmp[i]) continue; 6445 if (noff) { 6446 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6447 6448 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6449 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6450 n_o += noff; 6451 n_d += nown; 6452 } else { 6453 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6454 6455 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6456 n_d += mm->nz; 6457 } 6458 } 6459 if (mmdata->hasoffproc) { /* offprocess insertion */ 6460 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6461 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6462 } 6463 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6464 PetscFunctionReturn(0); 6465 } 6466 6467 /* Support for Pt * A, A * P, or Pt * A * P */ 6468 #define MAX_NUMBER_INTERMEDIATE 4 6469 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6470 { 6471 Mat_Product *product = C->product; 6472 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6473 Mat_MPIAIJ *a,*p; 6474 MatMatMPIAIJBACKEND *mmdata; 6475 ISLocalToGlobalMapping P_oth_l2g = NULL; 6476 IS glob = NULL; 6477 const char *prefix; 6478 char pprefix[256]; 6479 const PetscInt *globidx,*P_oth_idx; 6480 PetscInt i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j; 6481 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6482 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6483 /* a base offset; type-2: sparse with a local to global map table */ 6484 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6485 6486 MatProductType ptype; 6487 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6488 PetscMPIInt size; 6489 PetscErrorCode ierr; 6490 6491 PetscFunctionBegin; 6492 MatCheckProduct(C,1); 6493 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6494 ptype = product->type; 6495 if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB; 6496 switch (ptype) { 6497 case MATPRODUCT_AB: 6498 A = product->A; 6499 P = product->B; 6500 m = A->rmap->n; 6501 n = P->cmap->n; 6502 M = A->rmap->N; 6503 N = P->cmap->N; 6504 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6505 break; 6506 case MATPRODUCT_AtB: 6507 P = product->A; 6508 A = product->B; 6509 m = P->cmap->n; 6510 n = A->cmap->n; 6511 M = P->cmap->N; 6512 N = A->cmap->N; 6513 hasoffproc = PETSC_TRUE; 6514 break; 6515 case MATPRODUCT_PtAP: 6516 A = product->A; 6517 P = product->B; 6518 m = P->cmap->n; 6519 n = P->cmap->n; 6520 M = P->cmap->N; 6521 N = P->cmap->N; 6522 hasoffproc = PETSC_TRUE; 6523 break; 6524 default: 6525 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6526 } 6527 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6528 if (size == 1) hasoffproc = PETSC_FALSE; 6529 6530 /* defaults */ 6531 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6532 mp[i] = NULL; 6533 mptmp[i] = PETSC_FALSE; 6534 rmapt[i] = -1; 6535 cmapt[i] = -1; 6536 rmapa[i] = NULL; 6537 cmapa[i] = NULL; 6538 } 6539 6540 /* customization */ 6541 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6542 mmdata->reusesym = product->api_user; 6543 if (ptype == MATPRODUCT_AB) { 6544 if (product->api_user) { 6545 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6546 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6547 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6548 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6549 } else { 6550 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6551 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6552 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6553 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6554 } 6555 } else if (ptype == MATPRODUCT_PtAP) { 6556 if (product->api_user) { 6557 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6558 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6559 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6560 } else { 6561 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6562 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6563 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6564 } 6565 } 6566 a = (Mat_MPIAIJ*)A->data; 6567 p = (Mat_MPIAIJ*)P->data; 6568 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6569 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6570 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6571 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6572 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6573 6574 cp = 0; 6575 switch (ptype) { 6576 case MATPRODUCT_AB: /* A * P */ 6577 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6578 6579 /* A_diag * P_local (merged or not) */ 6580 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6581 /* P is product->B */ 6582 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6583 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6584 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6585 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6586 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6587 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6588 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6589 mp[cp]->product->api_user = product->api_user; 6590 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6591 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6592 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6593 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6594 rmapt[cp] = 1; 6595 cmapt[cp] = 2; 6596 cmapa[cp] = globidx; 6597 mptmp[cp] = PETSC_FALSE; 6598 cp++; 6599 } else { /* A_diag * P_diag and A_diag * P_off */ 6600 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6601 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6602 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6603 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6604 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6605 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6606 mp[cp]->product->api_user = product->api_user; 6607 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6608 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6609 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6610 rmapt[cp] = 1; 6611 cmapt[cp] = 1; 6612 mptmp[cp] = PETSC_FALSE; 6613 cp++; 6614 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6615 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6616 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6617 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6618 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6619 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6620 mp[cp]->product->api_user = product->api_user; 6621 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6622 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6623 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6624 rmapt[cp] = 1; 6625 cmapt[cp] = 2; 6626 cmapa[cp] = p->garray; 6627 mptmp[cp] = PETSC_FALSE; 6628 cp++; 6629 } 6630 6631 /* A_off * P_other */ 6632 if (mmdata->P_oth) { 6633 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */ 6634 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6635 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6636 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6637 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6638 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6639 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6640 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6641 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6642 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6643 mp[cp]->product->api_user = product->api_user; 6644 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6645 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6646 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6647 rmapt[cp] = 1; 6648 cmapt[cp] = 2; 6649 cmapa[cp] = P_oth_idx; 6650 mptmp[cp] = PETSC_FALSE; 6651 cp++; 6652 } 6653 break; 6654 6655 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6656 /* A is product->B */ 6657 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6658 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 6659 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6660 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6661 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6662 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6663 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6664 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6665 mp[cp]->product->api_user = product->api_user; 6666 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6667 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6668 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6669 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6670 rmapt[cp] = 2; 6671 rmapa[cp] = globidx; 6672 cmapt[cp] = 2; 6673 cmapa[cp] = globidx; 6674 mptmp[cp] = PETSC_FALSE; 6675 cp++; 6676 } else { 6677 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6678 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6679 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6680 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6681 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6682 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6683 mp[cp]->product->api_user = product->api_user; 6684 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6685 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6686 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6687 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6688 rmapt[cp] = 1; 6689 cmapt[cp] = 2; 6690 cmapa[cp] = globidx; 6691 mptmp[cp] = PETSC_FALSE; 6692 cp++; 6693 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6694 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6695 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6696 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6697 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6698 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6699 mp[cp]->product->api_user = product->api_user; 6700 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6701 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6702 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6703 rmapt[cp] = 2; 6704 rmapa[cp] = p->garray; 6705 cmapt[cp] = 2; 6706 cmapa[cp] = globidx; 6707 mptmp[cp] = PETSC_FALSE; 6708 cp++; 6709 } 6710 break; 6711 case MATPRODUCT_PtAP: 6712 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6713 /* P is product->B */ 6714 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6715 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6716 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6717 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6718 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6719 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6720 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6721 mp[cp]->product->api_user = product->api_user; 6722 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6723 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6724 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6725 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6726 rmapt[cp] = 2; 6727 rmapa[cp] = globidx; 6728 cmapt[cp] = 2; 6729 cmapa[cp] = globidx; 6730 mptmp[cp] = PETSC_FALSE; 6731 cp++; 6732 if (mmdata->P_oth) { 6733 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6734 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6735 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6736 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6737 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6738 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6739 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6740 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6741 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6742 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6743 mp[cp]->product->api_user = product->api_user; 6744 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6745 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6746 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6747 mptmp[cp] = PETSC_TRUE; 6748 cp++; 6749 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6750 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6751 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6752 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6753 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6754 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6755 mp[cp]->product->api_user = product->api_user; 6756 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6757 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6758 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6759 rmapt[cp] = 2; 6760 rmapa[cp] = globidx; 6761 cmapt[cp] = 2; 6762 cmapa[cp] = P_oth_idx; 6763 mptmp[cp] = PETSC_FALSE; 6764 cp++; 6765 } 6766 break; 6767 default: 6768 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6769 } 6770 /* sanity check */ 6771 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6772 6773 ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr); 6774 for (i = 0; i < cp; i++) { 6775 mmdata->mp[i] = mp[i]; 6776 mmdata->mptmp[i] = mptmp[i]; 6777 } 6778 mmdata->cp = cp; 6779 C->product->data = mmdata; 6780 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6781 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6782 6783 /* memory type */ 6784 mmdata->mtype = PETSC_MEMTYPE_HOST; 6785 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6786 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6787 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6788 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6789 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6790 6791 /* prepare coo coordinates for values insertion */ 6792 6793 /* count total nonzeros of those intermediate seqaij Mats 6794 ncoo_d: # of nonzeros of matrices that do not have offproc entries 6795 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 6796 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 6797 */ 6798 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6799 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6800 if (mptmp[cp]) continue; 6801 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 6802 const PetscInt *rmap = rmapa[cp]; 6803 const PetscInt mr = mp[cp]->rmap->n; 6804 const PetscInt rs = C->rmap->rstart; 6805 const PetscInt re = C->rmap->rend; 6806 const PetscInt *ii = mm->i; 6807 for (i = 0; i < mr; i++) { 6808 const PetscInt gr = rmap[i]; 6809 const PetscInt nz = ii[i+1] - ii[i]; 6810 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 6811 else ncoo_oown += nz; /* this row is local */ 6812 } 6813 } else ncoo_d += mm->nz; 6814 } 6815 6816 /* 6817 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 6818 6819 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 6820 6821 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 6822 6823 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 6824 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 6825 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 6826 6827 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 6828 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 6829 */ 6830 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */ 6831 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6832 6833 /* gather (i,j) of nonzeros inserted by remote procs */ 6834 if (hasoffproc) { 6835 PetscSF msf; 6836 PetscInt ncoo2,*coo_i2,*coo_j2; 6837 6838 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6839 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6840 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */ 6841 6842 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6843 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6844 PetscInt *idxoff = mmdata->off[cp]; 6845 PetscInt *idxown = mmdata->own[cp]; 6846 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 6847 const PetscInt *rmap = rmapa[cp]; 6848 const PetscInt *cmap = cmapa[cp]; 6849 const PetscInt *ii = mm->i; 6850 PetscInt *coi = coo_i + ncoo_o; 6851 PetscInt *coj = coo_j + ncoo_o; 6852 const PetscInt mr = mp[cp]->rmap->n; 6853 const PetscInt rs = C->rmap->rstart; 6854 const PetscInt re = C->rmap->rend; 6855 const PetscInt cs = C->cmap->rstart; 6856 for (i = 0; i < mr; i++) { 6857 const PetscInt *jj = mm->j + ii[i]; 6858 const PetscInt gr = rmap[i]; 6859 const PetscInt nz = ii[i+1] - ii[i]; 6860 if (gr < rs || gr >= re) { /* this is an offproc row */ 6861 for (j = ii[i]; j < ii[i+1]; j++) { 6862 *coi++ = gr; 6863 *idxoff++ = j; 6864 } 6865 if (!cmapt[cp]) { /* already global */ 6866 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6867 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6868 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6869 } else { /* offdiag */ 6870 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6871 } 6872 ncoo_o += nz; 6873 } else { /* this is a local row */ 6874 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6875 } 6876 } 6877 } 6878 mmdata->off[cp + 1] = idxoff; 6879 mmdata->own[cp + 1] = idxown; 6880 } 6881 6882 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6883 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6884 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6885 ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr); 6886 ncoo = ncoo_d + ncoo_oown + ncoo2; 6887 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6888 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */ 6889 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6890 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6891 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6892 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6893 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 6894 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6895 coo_i = coo_i2; 6896 coo_j = coo_j2; 6897 } else { /* no offproc values insertion */ 6898 ncoo = ncoo_d; 6899 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6900 6901 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6902 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6903 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6904 } 6905 mmdata->hasoffproc = hasoffproc; 6906 6907 /* gather (i,j) of nonzeros inserted locally */ 6908 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6909 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6910 PetscInt *coi = coo_i + ncoo_d; 6911 PetscInt *coj = coo_j + ncoo_d; 6912 const PetscInt *jj = mm->j; 6913 const PetscInt *ii = mm->i; 6914 const PetscInt *cmap = cmapa[cp]; 6915 const PetscInt *rmap = rmapa[cp]; 6916 const PetscInt mr = mp[cp]->rmap->n; 6917 const PetscInt rs = C->rmap->rstart; 6918 const PetscInt re = C->rmap->rend; 6919 const PetscInt cs = C->cmap->rstart; 6920 6921 if (mptmp[cp]) continue; 6922 if (rmapt[cp] == 1) { /* consecutive rows */ 6923 /* fill coo_i */ 6924 for (i = 0; i < mr; i++) { 6925 const PetscInt gr = i + rs; 6926 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6927 } 6928 /* fill coo_j */ 6929 if (!cmapt[cp]) { /* type-0, already global */ 6930 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6931 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 6932 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 6933 } else { /* type-2, local to global for sparse columns */ 6934 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6935 } 6936 ncoo_d += mm->nz; 6937 } else if (rmapt[cp] == 2) { /* sparse rows */ 6938 for (i = 0; i < mr; i++) { 6939 const PetscInt *jj = mm->j + ii[i]; 6940 const PetscInt gr = rmap[i]; 6941 const PetscInt nz = ii[i+1] - ii[i]; 6942 if (gr >= rs && gr < re) { /* local rows */ 6943 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6944 if (!cmapt[cp]) { /* type-0, already global */ 6945 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6946 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6947 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6948 } else { /* type-2, local to global for sparse columns */ 6949 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6950 } 6951 ncoo_d += nz; 6952 } 6953 } 6954 } 6955 } 6956 if (glob) { 6957 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6958 } 6959 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6960 if (P_oth_l2g) { 6961 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6962 } 6963 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6964 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 6965 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6966 6967 /* preallocate with COO data */ 6968 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6969 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6970 PetscFunctionReturn(0); 6971 } 6972 6973 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 6974 { 6975 Mat_Product *product = mat->product; 6976 PetscErrorCode ierr; 6977 #if defined(PETSC_HAVE_DEVICE) 6978 PetscBool match = PETSC_FALSE; 6979 PetscBool usecpu = PETSC_FALSE; 6980 #else 6981 PetscBool match = PETSC_TRUE; 6982 #endif 6983 6984 PetscFunctionBegin; 6985 MatCheckProduct(mat,1); 6986 #if defined(PETSC_HAVE_DEVICE) 6987 if (!product->A->boundtocpu && !product->B->boundtocpu) { 6988 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 6989 } 6990 if (match) { /* we can always fallback to the CPU if requested */ 6991 switch (product->type) { 6992 case MATPRODUCT_AB: 6993 if (product->api_user) { 6994 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6995 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6996 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6997 } else { 6998 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6999 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7000 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7001 } 7002 break; 7003 case MATPRODUCT_AtB: 7004 if (product->api_user) { 7005 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7006 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7007 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7008 } else { 7009 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7010 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7011 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7012 } 7013 break; 7014 case MATPRODUCT_PtAP: 7015 if (product->api_user) { 7016 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7017 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7018 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7019 } else { 7020 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7021 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7022 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7023 } 7024 break; 7025 default: 7026 break; 7027 } 7028 match = (PetscBool)!usecpu; 7029 } 7030 #endif 7031 if (match) { 7032 switch (product->type) { 7033 case MATPRODUCT_AB: 7034 case MATPRODUCT_AtB: 7035 case MATPRODUCT_PtAP: 7036 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7037 break; 7038 default: 7039 break; 7040 } 7041 } 7042 /* fallback to MPIAIJ ops */ 7043 if (!mat->ops->productsymbolic) { 7044 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7045 } 7046 PetscFunctionReturn(0); 7047 } 7048