1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb,*aav,*bav; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 93 ia = a->i; 94 ib = b->i; 95 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 96 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) { 101 cnt++; 102 goto ok1; 103 } 104 aa = aav + ia[i]; 105 for (j=0; j<na; j++) { 106 if (aa[j] != 0.0) goto ok1; 107 } 108 bb = bav + ib[i]; 109 for (j=0; j <nb; j++) { 110 if (bb[j] != 0.0) goto ok1; 111 } 112 cnt++; 113 ok1:; 114 } 115 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 116 if (!n0rows) { 117 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 118 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 122 cnt = 0; 123 for (i=0; i<m; i++) { 124 na = ia[i+1] - ia[i]; 125 nb = ib[i+1] - ib[i]; 126 if (!na && !nb) continue; 127 aa = aav + ia[i]; 128 for (j=0; j<na;j++) { 129 if (aa[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 bb = bav + ib[i]; 135 for (j=0; j<nb; j++) { 136 if (bb[j] != 0.0) { 137 rows[cnt++] = rstart + i; 138 goto ok2; 139 } 140 } 141 ok2:; 142 } 143 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 144 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 145 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 153 PetscBool cong; 154 155 PetscFunctionBegin; 156 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 157 if (Y->assembled && cong) { 158 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 159 } else { 160 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 161 } 162 PetscFunctionReturn(0); 163 } 164 165 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 166 { 167 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 168 PetscErrorCode ierr; 169 PetscInt i,rstart,nrows,*rows; 170 171 PetscFunctionBegin; 172 *zrows = NULL; 173 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 174 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 175 for (i=0; i<nrows; i++) rows[i] += rstart; 176 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 181 { 182 PetscErrorCode ierr; 183 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 184 PetscInt i,n,*garray = aij->garray; 185 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 186 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 187 PetscReal *work; 188 const PetscScalar *dummy; 189 190 PetscFunctionBegin; 191 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 192 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 193 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 194 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 195 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 196 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 197 if (type == NORM_2) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 203 } 204 } else if (type == NORM_1) { 205 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 206 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 207 } 208 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 209 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 210 } 211 } else if (type == NORM_INFINITY) { 212 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 213 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 214 } 215 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 216 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 217 } 218 219 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 220 if (type == NORM_INFINITY) { 221 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 222 } else { 223 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 224 } 225 ierr = PetscFree(work);CHKERRQ(ierr); 226 if (type == NORM_2) { 227 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 228 } 229 PetscFunctionReturn(0); 230 } 231 232 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 233 { 234 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 235 IS sis,gis; 236 PetscErrorCode ierr; 237 const PetscInt *isis,*igis; 238 PetscInt n,*iis,nsis,ngis,rstart,i; 239 240 PetscFunctionBegin; 241 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 242 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 243 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 244 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 245 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 246 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 247 248 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 249 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 250 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 251 n = ngis + nsis; 252 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 253 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 254 for (i=0; i<n; i++) iis[i] += rstart; 255 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 256 257 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 258 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 259 ierr = ISDestroy(&sis);CHKERRQ(ierr); 260 ierr = ISDestroy(&gis);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 Local utility routine that creates a mapping from the global column 266 number to the local number in the off-diagonal part of the local 267 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 268 a slightly higher hash table cost; without it it is not scalable (each processor 269 has an order N integer array but is fast to access. 270 */ 271 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 274 PetscErrorCode ierr; 275 PetscInt n = aij->B->cmap->n,i; 276 277 PetscFunctionBegin; 278 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 279 #if defined(PETSC_USE_CTABLE) 280 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 281 for (i=0; i<n; i++) { 282 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 283 } 284 #else 285 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 286 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 287 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 288 #endif 289 PetscFunctionReturn(0); 290 } 291 292 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 293 { \ 294 if (col <= lastcol1) low1 = 0; \ 295 else high1 = nrow1; \ 296 lastcol1 = col;\ 297 while (high1-low1 > 5) { \ 298 t = (low1+high1)/2; \ 299 if (rp1[t] > col) high1 = t; \ 300 else low1 = t; \ 301 } \ 302 for (_i=low1; _i<high1; _i++) { \ 303 if (rp1[_i] > col) break; \ 304 if (rp1[_i] == col) { \ 305 if (addv == ADD_VALUES) { \ 306 ap1[_i] += value; \ 307 /* Not sure LogFlops will slow dow the code or not */ \ 308 (void)PetscLogFlops(1.0); \ 309 } \ 310 else ap1[_i] = value; \ 311 inserted = PETSC_TRUE; \ 312 goto a_noinsert; \ 313 } \ 314 } \ 315 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 316 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 317 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 318 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 319 N = nrow1++ - 1; a->nz++; high1++; \ 320 /* shift up all the later entries in this row */ \ 321 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 322 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 323 rp1[_i] = col; \ 324 ap1[_i] = value; \ 325 A->nonzerostate++;\ 326 a_noinsert: ; \ 327 ailen[row] = nrow1; \ 328 } 329 330 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 331 { \ 332 if (col <= lastcol2) low2 = 0; \ 333 else high2 = nrow2; \ 334 lastcol2 = col; \ 335 while (high2-low2 > 5) { \ 336 t = (low2+high2)/2; \ 337 if (rp2[t] > col) high2 = t; \ 338 else low2 = t; \ 339 } \ 340 for (_i=low2; _i<high2; _i++) { \ 341 if (rp2[_i] > col) break; \ 342 if (rp2[_i] == col) { \ 343 if (addv == ADD_VALUES) { \ 344 ap2[_i] += value; \ 345 (void)PetscLogFlops(1.0); \ 346 } \ 347 else ap2[_i] = value; \ 348 inserted = PETSC_TRUE; \ 349 goto b_noinsert; \ 350 } \ 351 } \ 352 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 353 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 354 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 355 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 356 N = nrow2++ - 1; b->nz++; high2++; \ 357 /* shift up all the later entries in this row */ \ 358 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 359 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 360 rp2[_i] = col; \ 361 ap2[_i] = value; \ 362 B->nonzerostate++; \ 363 b_noinsert: ; \ 364 bilen[row] = nrow2; \ 365 } 366 367 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 368 { 369 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 370 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 371 PetscErrorCode ierr; 372 PetscInt l,*garray = mat->garray,diag; 373 374 PetscFunctionBegin; 375 /* code only works for square matrices A */ 376 377 /* find size of row to the left of the diagonal part */ 378 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 379 row = row - diag; 380 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 381 if (garray[b->j[b->i[row]+l]] > diag) break; 382 } 383 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 384 385 /* diagonal part */ 386 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 387 388 /* right of diagonal part */ 389 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 390 #if defined(PETSC_HAVE_DEVICE) 391 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 392 #endif 393 PetscFunctionReturn(0); 394 } 395 396 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 397 { 398 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 399 PetscScalar value = 0.0; 400 PetscErrorCode ierr; 401 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 402 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 403 PetscBool roworiented = aij->roworiented; 404 405 /* Some Variables required in the macro */ 406 Mat A = aij->A; 407 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 408 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 409 PetscBool ignorezeroentries = a->ignorezeroentries; 410 Mat B = aij->B; 411 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 412 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 413 MatScalar *aa,*ba; 414 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 415 * cannot use "#if defined" inside a macro. */ 416 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 417 418 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 419 PetscInt nonew; 420 MatScalar *ap1,*ap2; 421 422 PetscFunctionBegin; 423 #if defined(PETSC_HAVE_DEVICE) 424 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 425 const PetscScalar *dummy; 426 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 427 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 428 } 429 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 430 const PetscScalar *dummy; 431 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 432 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 433 } 434 #endif 435 aa = a->a; 436 ba = b->a; 437 for (i=0; i<m; i++) { 438 if (im[i] < 0) continue; 439 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 440 if (im[i] >= rstart && im[i] < rend) { 441 row = im[i] - rstart; 442 lastcol1 = -1; 443 rp1 = aj + ai[row]; 444 ap1 = aa + ai[row]; 445 rmax1 = aimax[row]; 446 nrow1 = ailen[row]; 447 low1 = 0; 448 high1 = nrow1; 449 lastcol2 = -1; 450 rp2 = bj + bi[row]; 451 ap2 = ba + bi[row]; 452 rmax2 = bimax[row]; 453 nrow2 = bilen[row]; 454 low2 = 0; 455 high2 = nrow2; 456 457 for (j=0; j<n; j++) { 458 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 459 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 460 if (in[j] >= cstart && in[j] < cend) { 461 col = in[j] - cstart; 462 nonew = a->nonew; 463 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 464 #if defined(PETSC_HAVE_DEVICE) 465 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 466 #endif 467 } else if (in[j] < 0) continue; 468 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 469 else { 470 if (mat->was_assembled) { 471 if (!aij->colmap) { 472 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 473 } 474 #if defined(PETSC_USE_CTABLE) 475 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 476 col--; 477 #else 478 col = aij->colmap[in[j]] - 1; 479 #endif 480 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 481 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 482 col = in[j]; 483 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 484 B = aij->B; 485 b = (Mat_SeqAIJ*)B->data; 486 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 487 rp2 = bj + bi[row]; 488 ap2 = ba + bi[row]; 489 rmax2 = bimax[row]; 490 nrow2 = bilen[row]; 491 low2 = 0; 492 high2 = nrow2; 493 bm = aij->B->rmap->n; 494 ba = b->a; 495 inserted = PETSC_FALSE; 496 } else if (col < 0) { 497 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 498 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 499 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 500 } 501 } else col = in[j]; 502 nonew = b->nonew; 503 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 504 #if defined(PETSC_HAVE_DEVICE) 505 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 506 #endif 507 } 508 } 509 } else { 510 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 511 if (!aij->donotstash) { 512 mat->assembled = PETSC_FALSE; 513 if (roworiented) { 514 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 515 } else { 516 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 517 } 518 } 519 } 520 } 521 PetscFunctionReturn(0); 522 } 523 524 /* 525 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 526 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 527 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 528 */ 529 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 532 Mat A = aij->A; /* diagonal part of the matrix */ 533 Mat B = aij->B; /* offdiagonal part of the matrix */ 534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 535 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 536 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 537 PetscInt *ailen = a->ilen,*aj = a->j; 538 PetscInt *bilen = b->ilen,*bj = b->j; 539 PetscInt am = aij->A->rmap->n,j; 540 PetscInt diag_so_far = 0,dnz; 541 PetscInt offd_so_far = 0,onz; 542 543 PetscFunctionBegin; 544 /* Iterate over all rows of the matrix */ 545 for (j=0; j<am; j++) { 546 dnz = onz = 0; 547 /* Iterate over all non-zero columns of the current row */ 548 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 549 /* If column is in the diagonal */ 550 if (mat_j[col] >= cstart && mat_j[col] < cend) { 551 aj[diag_so_far++] = mat_j[col] - cstart; 552 dnz++; 553 } else { /* off-diagonal entries */ 554 bj[offd_so_far++] = mat_j[col]; 555 onz++; 556 } 557 } 558 ailen[j] = dnz; 559 bilen[j] = onz; 560 } 561 PetscFunctionReturn(0); 562 } 563 564 /* 565 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 566 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 567 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 568 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 569 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 570 */ 571 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 572 { 573 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 574 Mat A = aij->A; /* diagonal part of the matrix */ 575 Mat B = aij->B; /* offdiagonal part of the matrix */ 576 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 577 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 578 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 579 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 580 PetscInt *ailen = a->ilen,*aj = a->j; 581 PetscInt *bilen = b->ilen,*bj = b->j; 582 PetscInt am = aij->A->rmap->n,j; 583 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 584 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 585 PetscScalar *aa = a->a,*ba = b->a; 586 587 PetscFunctionBegin; 588 /* Iterate over all rows of the matrix */ 589 for (j=0; j<am; j++) { 590 dnz_row = onz_row = 0; 591 rowstart_offd = full_offd_i[j]; 592 rowstart_diag = full_diag_i[j]; 593 /* Iterate over all non-zero columns of the current row */ 594 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 595 /* If column is in the diagonal */ 596 if (mat_j[col] >= cstart && mat_j[col] < cend) { 597 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 598 aa[rowstart_diag+dnz_row] = mat_a[col]; 599 dnz_row++; 600 } else { /* off-diagonal entries */ 601 bj[rowstart_offd+onz_row] = mat_j[col]; 602 ba[rowstart_offd+onz_row] = mat_a[col]; 603 onz_row++; 604 } 605 } 606 ailen[j] = dnz_row; 607 bilen[j] = onz_row; 608 } 609 PetscFunctionReturn(0); 610 } 611 612 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 613 { 614 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 615 PetscErrorCode ierr; 616 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 617 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 618 619 PetscFunctionBegin; 620 for (i=0; i<m; i++) { 621 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 622 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 623 if (idxm[i] >= rstart && idxm[i] < rend) { 624 row = idxm[i] - rstart; 625 for (j=0; j<n; j++) { 626 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 627 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 628 if (idxn[j] >= cstart && idxn[j] < cend) { 629 col = idxn[j] - cstart; 630 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 631 } else { 632 if (!aij->colmap) { 633 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 634 } 635 #if defined(PETSC_USE_CTABLE) 636 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 637 col--; 638 #else 639 col = aij->colmap[idxn[j]] - 1; 640 #endif 641 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 642 else { 643 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 644 } 645 } 646 } 647 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 648 } 649 PetscFunctionReturn(0); 650 } 651 652 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 653 { 654 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 655 PetscErrorCode ierr; 656 PetscInt nstash,reallocs; 657 658 PetscFunctionBegin; 659 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 660 661 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 662 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 663 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 668 { 669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670 PetscErrorCode ierr; 671 PetscMPIInt n; 672 PetscInt i,j,rstart,ncols,flg; 673 PetscInt *row,*col; 674 PetscBool other_disassembled; 675 PetscScalar *val; 676 677 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 678 679 PetscFunctionBegin; 680 if (!aij->donotstash && !mat->nooffprocentries) { 681 while (1) { 682 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 683 if (!flg) break; 684 685 for (i=0; i<n;) { 686 /* Now identify the consecutive vals belonging to the same row */ 687 for (j=i,rstart=row[j]; j<n; j++) { 688 if (row[j] != rstart) break; 689 } 690 if (j < n) ncols = j-i; 691 else ncols = n-i; 692 /* Now assemble all these values with a single function call */ 693 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 694 i = j; 695 } 696 } 697 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 698 } 699 #if defined(PETSC_HAVE_DEVICE) 700 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 701 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 702 if (mat->boundtocpu) { 703 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 704 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 705 } 706 #endif 707 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 709 710 /* determine if any processor has disassembled, if so we must 711 also disassemble ourself, in order that we may reassemble. */ 712 /* 713 if nonzero structure of submatrix B cannot change then we know that 714 no processor disassembled thus we can skip this stuff 715 */ 716 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 717 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 718 if (mat->was_assembled && !other_disassembled) { 719 #if defined(PETSC_HAVE_DEVICE) 720 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 721 #endif 722 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 723 } 724 } 725 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 726 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 727 } 728 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 729 #if defined(PETSC_HAVE_DEVICE) 730 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 731 #endif 732 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 733 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 734 735 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 736 737 aij->rowvalues = NULL; 738 739 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 740 741 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 742 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 743 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 744 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 745 } 746 #if defined(PETSC_HAVE_DEVICE) 747 mat->offloadmask = PETSC_OFFLOAD_BOTH; 748 #endif 749 PetscFunctionReturn(0); 750 } 751 752 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 753 { 754 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 759 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 764 { 765 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 766 PetscObjectState sA, sB; 767 PetscInt *lrows; 768 PetscInt r, len; 769 PetscBool cong, lch, gch; 770 PetscErrorCode ierr; 771 772 PetscFunctionBegin; 773 /* get locally owned rows */ 774 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 775 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 776 /* fix right hand side if needed */ 777 if (x && b) { 778 const PetscScalar *xx; 779 PetscScalar *bb; 780 781 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 789 sA = mat->A->nonzerostate; 790 sB = mat->B->nonzerostate; 791 792 if (diag != 0.0 && cong) { 793 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 796 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 797 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 798 PetscInt nnwA, nnwB; 799 PetscBool nnzA, nnzB; 800 801 nnwA = aijA->nonew; 802 nnwB = aijB->nonew; 803 nnzA = aijA->keepnonzeropattern; 804 nnzB = aijB->keepnonzeropattern; 805 if (!nnzA) { 806 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 807 aijA->nonew = 0; 808 } 809 if (!nnzB) { 810 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 811 aijB->nonew = 0; 812 } 813 /* Must zero here before the next loop */ 814 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 815 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 for (r = 0; r < len; ++r) { 817 const PetscInt row = lrows[r] + A->rmap->rstart; 818 if (row >= A->cmap->N) continue; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 aijA->nonew = nnwA; 822 aijB->nonew = nnwB; 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 826 } 827 ierr = PetscFree(lrows);CHKERRQ(ierr); 828 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 829 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 831 /* reduce nonzerostate */ 832 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 833 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 834 if (gch) A->nonzerostate++; 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscMPIInt p = 0; 846 PetscSFNode *rrows; 847 PetscSF sf; 848 const PetscScalar *xx; 849 PetscScalar *bb,*mask; 850 Vec xmask,lmask; 851 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 852 const PetscInt *aj, *ii,*ridx; 853 PetscScalar *aa; 854 855 PetscFunctionBegin; 856 /* Create SF where leaves are input rows and roots are owned rows */ 857 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 858 for (r = 0; r < n; ++r) lrows[r] = -1; 859 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 860 for (r = 0; r < N; ++r) { 861 const PetscInt idx = rows[r]; 862 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 863 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 864 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 865 } 866 rrows[r].rank = p; 867 rrows[r].index = rows[r] - owners[p]; 868 } 869 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 870 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 871 /* Collect flags for rows to be zeroed */ 872 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 874 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 875 /* Compress and put in row numbers */ 876 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 877 /* zero diagonal part of matrix */ 878 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 879 /* handle off diagonal part of matrix */ 880 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 881 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 882 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 883 for (i=0; i<len; i++) bb[lrows[i]] = 1; 884 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 885 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 888 if (x && b) { /* this code is buggy when the row and column layout don't match */ 889 PetscBool cong; 890 891 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 892 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 893 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 896 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 897 } 898 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 899 /* remove zeroed rows of off diagonal matrix */ 900 ii = aij->i; 901 for (i=0; i<len; i++) { 902 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 903 } 904 /* loop over all elements of off process part of matrix zeroing removed columns*/ 905 if (aij->compressedrow.use) { 906 m = aij->compressedrow.nrows; 907 ii = aij->compressedrow.i; 908 ridx = aij->compressedrow.rindex; 909 for (i=0; i<m; i++) { 910 n = ii[i+1] - ii[i]; 911 aj = aij->j + ii[i]; 912 aa = aij->a + ii[i]; 913 914 for (j=0; j<n; j++) { 915 if (PetscAbsScalar(mask[*aj])) { 916 if (b) bb[*ridx] -= *aa*xx[*aj]; 917 *aa = 0.0; 918 } 919 aa++; 920 aj++; 921 } 922 ridx++; 923 } 924 } else { /* do not use compressed row format */ 925 m = l->B->rmap->n; 926 for (i=0; i<m; i++) { 927 n = ii[i+1] - ii[i]; 928 aj = aij->j + ii[i]; 929 aa = aij->a + ii[i]; 930 for (j=0; j<n; j++) { 931 if (PetscAbsScalar(mask[*aj])) { 932 if (b) bb[i] -= *aa*xx[*aj]; 933 *aa = 0.0; 934 } 935 aa++; 936 aj++; 937 } 938 } 939 } 940 if (x && b) { 941 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 942 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 943 } 944 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 945 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 946 ierr = PetscFree(lrows);CHKERRQ(ierr); 947 948 /* only change matrix nonzero state if pattern was allowed to be changed */ 949 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 950 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 951 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 952 } 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 957 { 958 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 959 PetscErrorCode ierr; 960 PetscInt nt; 961 VecScatter Mvctx = a->Mvctx; 962 963 PetscFunctionBegin; 964 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 965 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 966 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 967 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 968 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 969 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 VecScatter Mvctx = a->Mvctx; 988 989 PetscFunctionBegin; 990 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 992 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 998 { 999 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1000 PetscErrorCode ierr; 1001 1002 PetscFunctionBegin; 1003 /* do nondiagonal part */ 1004 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1005 /* do local part */ 1006 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* add partial results together */ 1008 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1009 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1014 { 1015 MPI_Comm comm; 1016 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1017 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1018 IS Me,Notme; 1019 PetscErrorCode ierr; 1020 PetscInt M,N,first,last,*notme,i; 1021 PetscBool lf; 1022 PetscMPIInt size; 1023 1024 PetscFunctionBegin; 1025 /* Easy test: symmetric diagonal block */ 1026 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1027 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1028 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1029 if (!*f) PetscFunctionReturn(0); 1030 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1031 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1032 if (size == 1) PetscFunctionReturn(0); 1033 1034 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1035 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1036 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1037 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1038 for (i=0; i<first; i++) notme[i] = i; 1039 for (i=last; i<M; i++) notme[i-last+first] = i; 1040 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1041 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1042 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1043 Aoff = Aoffs[0]; 1044 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1045 Boff = Boffs[0]; 1046 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1047 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1048 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1049 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1050 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1051 ierr = PetscFree(notme);CHKERRQ(ierr); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1056 { 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1065 { 1066 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 1069 PetscFunctionBegin; 1070 /* do nondiagonal part */ 1071 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1072 /* do local part */ 1073 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1074 /* add partial results together */ 1075 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1076 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 /* 1081 This only works correctly for square matrices where the subblock A->A is the 1082 diagonal block 1083 */ 1084 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1085 { 1086 PetscErrorCode ierr; 1087 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1088 1089 PetscFunctionBegin; 1090 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1091 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1092 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1097 { 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1103 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1108 { 1109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1110 PetscErrorCode ierr; 1111 1112 PetscFunctionBegin; 1113 #if defined(PETSC_USE_LOG) 1114 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1115 #endif 1116 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1117 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1118 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1119 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1120 #if defined(PETSC_USE_CTABLE) 1121 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1122 #else 1123 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1124 #endif 1125 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1126 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1127 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1128 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1129 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1130 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1131 1132 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1133 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1134 1135 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1145 #if defined(PETSC_HAVE_CUDA) 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1147 #endif 1148 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1150 #endif 1151 #if defined(PETSC_HAVE_ELEMENTAL) 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1153 #endif 1154 #if defined(PETSC_HAVE_SCALAPACK) 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1156 #endif 1157 #if defined(PETSC_HAVE_HYPRE) 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1159 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1160 #endif 1161 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1167 #if defined(PETSC_HAVE_MKL_SPARSE) 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1169 #endif 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1177 { 1178 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1179 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1180 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1181 const PetscInt *garray = aij->garray; 1182 const PetscScalar *aa,*ba; 1183 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1184 PetscInt *rowlens; 1185 PetscInt *colidxs; 1186 PetscScalar *matvals; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1191 1192 M = mat->rmap->N; 1193 N = mat->cmap->N; 1194 m = mat->rmap->n; 1195 rs = mat->rmap->rstart; 1196 cs = mat->cmap->rstart; 1197 nz = A->nz + B->nz; 1198 1199 /* write matrix header */ 1200 header[0] = MAT_FILE_CLASSID; 1201 header[1] = M; header[2] = N; header[3] = nz; 1202 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1203 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1204 1205 /* fill in and store row lengths */ 1206 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1207 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1208 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1209 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1210 1211 /* fill in and store column indices */ 1212 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1213 for (cnt=0, i=0; i<m; i++) { 1214 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1215 if (garray[B->j[jb]] > cs) break; 1216 colidxs[cnt++] = garray[B->j[jb]]; 1217 } 1218 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1219 colidxs[cnt++] = A->j[ja] + cs; 1220 for (; jb<B->i[i+1]; jb++) 1221 colidxs[cnt++] = garray[B->j[jb]]; 1222 } 1223 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1224 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1225 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1226 1227 /* fill in and store nonzero values */ 1228 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1229 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1230 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1231 for (cnt=0, i=0; i<m; i++) { 1232 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1233 if (garray[B->j[jb]] > cs) break; 1234 matvals[cnt++] = ba[jb]; 1235 } 1236 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1237 matvals[cnt++] = aa[ja]; 1238 for (; jb<B->i[i+1]; jb++) 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1242 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1243 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1244 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1245 ierr = PetscFree(matvals);CHKERRQ(ierr); 1246 1247 /* write block size option to the viewer's .info file */ 1248 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #include <petscdraw.h> 1253 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1254 { 1255 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1256 PetscErrorCode ierr; 1257 PetscMPIInt rank = aij->rank,size = aij->size; 1258 PetscBool isdraw,iascii,isbinary; 1259 PetscViewer sviewer; 1260 PetscViewerFormat format; 1261 1262 PetscFunctionBegin; 1263 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1264 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1265 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1266 if (iascii) { 1267 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1268 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1269 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1270 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1271 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1272 for (i=0; i<(PetscInt)size; i++) { 1273 nmax = PetscMax(nmax,nz[i]); 1274 nmin = PetscMin(nmin,nz[i]); 1275 navg += nz[i]; 1276 } 1277 ierr = PetscFree(nz);CHKERRQ(ierr); 1278 navg = navg/size; 1279 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1283 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1284 MatInfo info; 1285 PetscBool inodes; 1286 1287 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1288 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1289 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1290 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1291 if (!inodes) { 1292 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1293 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1294 } else { 1295 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1296 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1297 } 1298 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1299 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1300 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1301 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1302 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1304 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1305 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1308 PetscInt inodecount,inodelimit,*inodes; 1309 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1310 if (inodes) { 1311 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1312 } else { 1313 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1314 } 1315 PetscFunctionReturn(0); 1316 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1317 PetscFunctionReturn(0); 1318 } 1319 } else if (isbinary) { 1320 if (size == 1) { 1321 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1322 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1323 } else { 1324 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1325 } 1326 PetscFunctionReturn(0); 1327 } else if (iascii && size == 1) { 1328 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1329 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } else if (isdraw) { 1332 PetscDraw draw; 1333 PetscBool isnull; 1334 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1335 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1336 if (isnull) PetscFunctionReturn(0); 1337 } 1338 1339 { /* assemble the entire matrix onto first processor */ 1340 Mat A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1344 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1345 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1346 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1347 /* The commented code uses MatCreateSubMatrices instead */ 1348 /* 1349 Mat *AA, A = NULL, Av; 1350 IS isrow,iscol; 1351 1352 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1353 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1354 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1355 if (!rank) { 1356 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1357 A = AA[0]; 1358 Av = AA[0]; 1359 } 1360 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1361 */ 1362 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1363 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1364 /* 1365 Everyone has to call to draw the matrix since the graphics waits are 1366 synchronized across all processors that share the PetscDraw object 1367 */ 1368 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1369 if (!rank) { 1370 if (((PetscObject)mat)->name) { 1371 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1372 } 1373 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1376 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1377 ierr = MatDestroy(&A);CHKERRQ(ierr); 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1383 { 1384 PetscErrorCode ierr; 1385 PetscBool iascii,isdraw,issocket,isbinary; 1386 1387 PetscFunctionBegin; 1388 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1389 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1392 if (iascii || isdraw || isbinary || issocket) { 1393 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1399 { 1400 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1401 PetscErrorCode ierr; 1402 Vec bb1 = NULL; 1403 PetscBool hasop; 1404 1405 PetscFunctionBegin; 1406 if (flag == SOR_APPLY_UPPER) { 1407 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1412 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1413 } 1414 1415 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1416 if (flag & SOR_ZERO_INITIAL_GUESS) { 1417 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1418 its--; 1419 } 1420 1421 while (its--) { 1422 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1423 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1424 1425 /* update rhs: bb1 = bb - B*x */ 1426 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1427 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1428 1429 /* local sweep */ 1430 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1431 } 1432 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1433 if (flag & SOR_ZERO_INITIAL_GUESS) { 1434 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1435 its--; 1436 } 1437 while (its--) { 1438 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1439 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1440 1441 /* update rhs: bb1 = bb - B*x */ 1442 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1443 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1444 1445 /* local sweep */ 1446 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1447 } 1448 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1449 if (flag & SOR_ZERO_INITIAL_GUESS) { 1450 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1451 its--; 1452 } 1453 while (its--) { 1454 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1455 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1456 1457 /* update rhs: bb1 = bb - B*x */ 1458 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1459 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1460 1461 /* local sweep */ 1462 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1463 } 1464 } else if (flag & SOR_EISENSTAT) { 1465 Vec xx1; 1466 1467 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1468 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1469 1470 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1472 if (!mat->diag) { 1473 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1474 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1475 } 1476 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1477 if (hasop) { 1478 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1479 } else { 1480 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1481 } 1482 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1483 1484 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1485 1486 /* local sweep */ 1487 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1488 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1489 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1490 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1491 1492 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1493 1494 matin->factorerrortype = mat->A->factorerrortype; 1495 PetscFunctionReturn(0); 1496 } 1497 1498 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1499 { 1500 Mat aA,aB,Aperm; 1501 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1502 PetscScalar *aa,*ba; 1503 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1504 PetscSF rowsf,sf; 1505 IS parcolp = NULL; 1506 PetscBool done; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1511 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1512 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1513 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1514 1515 /* Invert row permutation to find out where my rows should go */ 1516 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1517 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1518 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1519 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1520 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1521 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1522 1523 /* Invert column permutation to find out where my columns should go */ 1524 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1525 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1526 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1527 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1528 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1529 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1530 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1531 1532 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1533 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1534 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1535 1536 /* Find out where my gcols should go */ 1537 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1538 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1539 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1540 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1541 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1542 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1543 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1544 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1545 1546 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1547 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1548 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1549 for (i=0; i<m; i++) { 1550 PetscInt row = rdest[i]; 1551 PetscMPIInt rowner; 1552 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1553 for (j=ai[i]; j<ai[i+1]; j++) { 1554 PetscInt col = cdest[aj[j]]; 1555 PetscMPIInt cowner; 1556 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1557 if (rowner == cowner) dnnz[i]++; 1558 else onnz[i]++; 1559 } 1560 for (j=bi[i]; j<bi[i+1]; j++) { 1561 PetscInt col = gcdest[bj[j]]; 1562 PetscMPIInt cowner; 1563 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1564 if (rowner == cowner) dnnz[i]++; 1565 else onnz[i]++; 1566 } 1567 } 1568 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1569 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1570 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1571 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1572 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1573 1574 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1575 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1576 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1577 for (i=0; i<m; i++) { 1578 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1579 PetscInt j0,rowlen; 1580 rowlen = ai[i+1] - ai[i]; 1581 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1582 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1583 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1584 } 1585 rowlen = bi[i+1] - bi[i]; 1586 for (j0=j=0; j<rowlen; j0=j) { 1587 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1588 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1589 } 1590 } 1591 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1592 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1593 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1594 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1595 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1596 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1597 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1598 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1599 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1600 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1601 *B = Aperm; 1602 PetscFunctionReturn(0); 1603 } 1604 1605 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1606 { 1607 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1608 PetscErrorCode ierr; 1609 1610 PetscFunctionBegin; 1611 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1612 if (ghosts) *ghosts = aij->garray; 1613 PetscFunctionReturn(0); 1614 } 1615 1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1617 { 1618 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1619 Mat A = mat->A,B = mat->B; 1620 PetscErrorCode ierr; 1621 PetscLogDouble isend[5],irecv[5]; 1622 1623 PetscFunctionBegin; 1624 info->block_size = 1.0; 1625 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1626 1627 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1628 isend[3] = info->memory; isend[4] = info->mallocs; 1629 1630 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1631 1632 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1633 isend[3] += info->memory; isend[4] += info->mallocs; 1634 if (flag == MAT_LOCAL) { 1635 info->nz_used = isend[0]; 1636 info->nz_allocated = isend[1]; 1637 info->nz_unneeded = isend[2]; 1638 info->memory = isend[3]; 1639 info->mallocs = isend[4]; 1640 } else if (flag == MAT_GLOBAL_MAX) { 1641 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1642 1643 info->nz_used = irecv[0]; 1644 info->nz_allocated = irecv[1]; 1645 info->nz_unneeded = irecv[2]; 1646 info->memory = irecv[3]; 1647 info->mallocs = irecv[4]; 1648 } else if (flag == MAT_GLOBAL_SUM) { 1649 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1650 1651 info->nz_used = irecv[0]; 1652 info->nz_allocated = irecv[1]; 1653 info->nz_unneeded = irecv[2]; 1654 info->memory = irecv[3]; 1655 info->mallocs = irecv[4]; 1656 } 1657 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1658 info->fill_ratio_needed = 0; 1659 info->factor_mallocs = 0; 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1664 { 1665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 switch (op) { 1670 case MAT_NEW_NONZERO_LOCATIONS: 1671 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1672 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1673 case MAT_KEEP_NONZERO_PATTERN: 1674 case MAT_NEW_NONZERO_LOCATION_ERR: 1675 case MAT_USE_INODES: 1676 case MAT_IGNORE_ZERO_ENTRIES: 1677 MatCheckPreallocated(A,1); 1678 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1679 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1680 break; 1681 case MAT_ROW_ORIENTED: 1682 MatCheckPreallocated(A,1); 1683 a->roworiented = flg; 1684 1685 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1686 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1687 break; 1688 case MAT_FORCE_DIAGONAL_ENTRIES: 1689 case MAT_SORTED_FULL: 1690 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1691 break; 1692 case MAT_IGNORE_OFF_PROC_ENTRIES: 1693 a->donotstash = flg; 1694 break; 1695 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1696 case MAT_SPD: 1697 case MAT_SYMMETRIC: 1698 case MAT_STRUCTURALLY_SYMMETRIC: 1699 case MAT_HERMITIAN: 1700 case MAT_SYMMETRY_ETERNAL: 1701 break; 1702 case MAT_SUBMAT_SINGLEIS: 1703 A->submat_singleis = flg; 1704 break; 1705 case MAT_STRUCTURE_ONLY: 1706 /* The option is handled directly by MatSetOption() */ 1707 break; 1708 default: 1709 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1710 } 1711 PetscFunctionReturn(0); 1712 } 1713 1714 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1715 { 1716 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1717 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1718 PetscErrorCode ierr; 1719 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1720 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1721 PetscInt *cmap,*idx_p; 1722 1723 PetscFunctionBegin; 1724 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1725 mat->getrowactive = PETSC_TRUE; 1726 1727 if (!mat->rowvalues && (idx || v)) { 1728 /* 1729 allocate enough space to hold information from the longest row. 1730 */ 1731 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1732 PetscInt max = 1,tmp; 1733 for (i=0; i<matin->rmap->n; i++) { 1734 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1735 if (max < tmp) max = tmp; 1736 } 1737 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1738 } 1739 1740 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1741 lrow = row - rstart; 1742 1743 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1744 if (!v) {pvA = NULL; pvB = NULL;} 1745 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1746 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1747 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1748 nztot = nzA + nzB; 1749 1750 cmap = mat->garray; 1751 if (v || idx) { 1752 if (nztot) { 1753 /* Sort by increasing column numbers, assuming A and B already sorted */ 1754 PetscInt imark = -1; 1755 if (v) { 1756 *v = v_p = mat->rowvalues; 1757 for (i=0; i<nzB; i++) { 1758 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1759 else break; 1760 } 1761 imark = i; 1762 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1763 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1764 } 1765 if (idx) { 1766 *idx = idx_p = mat->rowindices; 1767 if (imark > -1) { 1768 for (i=0; i<imark; i++) { 1769 idx_p[i] = cmap[cworkB[i]]; 1770 } 1771 } else { 1772 for (i=0; i<nzB; i++) { 1773 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1774 else break; 1775 } 1776 imark = i; 1777 } 1778 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1779 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1780 } 1781 } else { 1782 if (idx) *idx = NULL; 1783 if (v) *v = NULL; 1784 } 1785 } 1786 *nz = nztot; 1787 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1788 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1793 { 1794 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1795 1796 PetscFunctionBegin; 1797 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1798 aij->getrowactive = PETSC_FALSE; 1799 PetscFunctionReturn(0); 1800 } 1801 1802 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1803 { 1804 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1805 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1806 PetscErrorCode ierr; 1807 PetscInt i,j,cstart = mat->cmap->rstart; 1808 PetscReal sum = 0.0; 1809 MatScalar *v; 1810 1811 PetscFunctionBegin; 1812 if (aij->size == 1) { 1813 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1814 } else { 1815 if (type == NORM_FROBENIUS) { 1816 v = amat->a; 1817 for (i=0; i<amat->nz; i++) { 1818 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1819 } 1820 v = bmat->a; 1821 for (i=0; i<bmat->nz; i++) { 1822 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1823 } 1824 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1825 *norm = PetscSqrtReal(*norm); 1826 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1827 } else if (type == NORM_1) { /* max column norm */ 1828 PetscReal *tmp,*tmp2; 1829 PetscInt *jj,*garray = aij->garray; 1830 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1831 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1832 *norm = 0.0; 1833 v = amat->a; jj = amat->j; 1834 for (j=0; j<amat->nz; j++) { 1835 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1836 } 1837 v = bmat->a; jj = bmat->j; 1838 for (j=0; j<bmat->nz; j++) { 1839 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1840 } 1841 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1842 for (j=0; j<mat->cmap->N; j++) { 1843 if (tmp2[j] > *norm) *norm = tmp2[j]; 1844 } 1845 ierr = PetscFree(tmp);CHKERRQ(ierr); 1846 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1847 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1848 } else if (type == NORM_INFINITY) { /* max row norm */ 1849 PetscReal ntemp = 0.0; 1850 for (j=0; j<aij->A->rmap->n; j++) { 1851 v = amat->a + amat->i[j]; 1852 sum = 0.0; 1853 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1854 sum += PetscAbsScalar(*v); v++; 1855 } 1856 v = bmat->a + bmat->i[j]; 1857 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1858 sum += PetscAbsScalar(*v); v++; 1859 } 1860 if (sum > ntemp) ntemp = sum; 1861 } 1862 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1863 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1864 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1865 } 1866 PetscFunctionReturn(0); 1867 } 1868 1869 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1870 { 1871 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1872 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1873 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1874 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1875 PetscErrorCode ierr; 1876 Mat B,A_diag,*B_diag; 1877 const MatScalar *pbv,*bv; 1878 1879 PetscFunctionBegin; 1880 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1881 ai = Aloc->i; aj = Aloc->j; 1882 bi = Bloc->i; bj = Bloc->j; 1883 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1884 PetscInt *d_nnz,*g_nnz,*o_nnz; 1885 PetscSFNode *oloc; 1886 PETSC_UNUSED PetscSF sf; 1887 1888 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1889 /* compute d_nnz for preallocation */ 1890 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1891 for (i=0; i<ai[ma]; i++) { 1892 d_nnz[aj[i]]++; 1893 } 1894 /* compute local off-diagonal contributions */ 1895 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1896 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1897 /* map those to global */ 1898 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1899 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1900 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1901 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1902 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1903 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1904 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1905 1906 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1907 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1908 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1909 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1910 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1911 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1912 } else { 1913 B = *matout; 1914 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1915 } 1916 1917 b = (Mat_MPIAIJ*)B->data; 1918 A_diag = a->A; 1919 B_diag = &b->A; 1920 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1921 A_diag_ncol = A_diag->cmap->N; 1922 B_diag_ilen = sub_B_diag->ilen; 1923 B_diag_i = sub_B_diag->i; 1924 1925 /* Set ilen for diagonal of B */ 1926 for (i=0; i<A_diag_ncol; i++) { 1927 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1928 } 1929 1930 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1931 very quickly (=without using MatSetValues), because all writes are local. */ 1932 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1933 1934 /* copy over the B part */ 1935 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1936 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1937 pbv = bv; 1938 row = A->rmap->rstart; 1939 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1940 cols_tmp = cols; 1941 for (i=0; i<mb; i++) { 1942 ncol = bi[i+1]-bi[i]; 1943 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1944 row++; 1945 pbv += ncol; cols_tmp += ncol; 1946 } 1947 ierr = PetscFree(cols);CHKERRQ(ierr); 1948 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1949 1950 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1951 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1952 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1953 *matout = B; 1954 } else { 1955 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1956 } 1957 PetscFunctionReturn(0); 1958 } 1959 1960 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1961 { 1962 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1963 Mat a = aij->A,b = aij->B; 1964 PetscErrorCode ierr; 1965 PetscInt s1,s2,s3; 1966 1967 PetscFunctionBegin; 1968 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1969 if (rr) { 1970 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1971 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1972 /* Overlap communication with computation. */ 1973 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1974 } 1975 if (ll) { 1976 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1977 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1978 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1979 } 1980 /* scale the diagonal block */ 1981 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1982 1983 if (rr) { 1984 /* Do a scatter end and then right scale the off-diagonal block */ 1985 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1986 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 1987 } 1988 PetscFunctionReturn(0); 1989 } 1990 1991 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1992 { 1993 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1994 PetscErrorCode ierr; 1995 1996 PetscFunctionBegin; 1997 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2002 { 2003 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2004 Mat a,b,c,d; 2005 PetscBool flg; 2006 PetscErrorCode ierr; 2007 2008 PetscFunctionBegin; 2009 a = matA->A; b = matA->B; 2010 c = matB->A; d = matB->B; 2011 2012 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2013 if (flg) { 2014 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2015 } 2016 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2021 { 2022 PetscErrorCode ierr; 2023 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2024 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2025 2026 PetscFunctionBegin; 2027 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2028 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2029 /* because of the column compression in the off-processor part of the matrix a->B, 2030 the number of columns in a->B and b->B may be different, hence we cannot call 2031 the MatCopy() directly on the two parts. If need be, we can provide a more 2032 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2033 then copying the submatrices */ 2034 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2035 } else { 2036 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2037 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2038 } 2039 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2044 { 2045 PetscErrorCode ierr; 2046 2047 PetscFunctionBegin; 2048 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2049 PetscFunctionReturn(0); 2050 } 2051 2052 /* 2053 Computes the number of nonzeros per row needed for preallocation when X and Y 2054 have different nonzero structure. 2055 */ 2056 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2057 { 2058 PetscInt i,j,k,nzx,nzy; 2059 2060 PetscFunctionBegin; 2061 /* Set the number of nonzeros in the new matrix */ 2062 for (i=0; i<m; i++) { 2063 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2064 nzx = xi[i+1] - xi[i]; 2065 nzy = yi[i+1] - yi[i]; 2066 nnz[i] = 0; 2067 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2068 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2069 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2070 nnz[i]++; 2071 } 2072 for (; k<nzy; k++) nnz[i]++; 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2078 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2079 { 2080 PetscErrorCode ierr; 2081 PetscInt m = Y->rmap->N; 2082 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2083 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2084 2085 PetscFunctionBegin; 2086 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2091 { 2092 PetscErrorCode ierr; 2093 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2094 2095 PetscFunctionBegin; 2096 if (str == SAME_NONZERO_PATTERN) { 2097 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2098 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2099 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2100 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2101 } else { 2102 Mat B; 2103 PetscInt *nnz_d,*nnz_o; 2104 2105 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2106 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2107 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2108 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2109 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2110 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2111 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2112 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2113 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2114 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2115 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2116 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2117 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2123 2124 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2125 { 2126 #if defined(PETSC_USE_COMPLEX) 2127 PetscErrorCode ierr; 2128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2129 2130 PetscFunctionBegin; 2131 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2132 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2133 #else 2134 PetscFunctionBegin; 2135 #endif 2136 PetscFunctionReturn(0); 2137 } 2138 2139 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2140 { 2141 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2146 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2153 PetscErrorCode ierr; 2154 2155 PetscFunctionBegin; 2156 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2157 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2162 { 2163 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2164 PetscErrorCode ierr; 2165 PetscInt i,*idxb = NULL,m = A->rmap->n; 2166 PetscScalar *va,*vv; 2167 Vec vB,vA; 2168 const PetscScalar *vb; 2169 2170 PetscFunctionBegin; 2171 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2172 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2173 2174 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2175 if (idx) { 2176 for (i=0; i<m; i++) { 2177 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2178 } 2179 } 2180 2181 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2182 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2183 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2184 2185 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2186 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2187 for (i=0; i<m; i++) { 2188 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2189 vv[i] = vb[i]; 2190 if (idx) idx[i] = a->garray[idxb[i]]; 2191 } else { 2192 vv[i] = va[i]; 2193 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2194 idx[i] = a->garray[idxb[i]]; 2195 } 2196 } 2197 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2198 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2199 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2200 ierr = PetscFree(idxb);CHKERRQ(ierr); 2201 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2202 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2207 { 2208 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2209 PetscInt m = A->rmap->n,n = A->cmap->n; 2210 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2211 PetscInt *cmap = mat->garray; 2212 PetscInt *diagIdx, *offdiagIdx; 2213 Vec diagV, offdiagV; 2214 PetscScalar *a, *diagA, *offdiagA; 2215 const PetscScalar *ba,*bav; 2216 PetscInt r,j,col,ncols,*bi,*bj; 2217 PetscErrorCode ierr; 2218 Mat B = mat->B; 2219 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2220 2221 PetscFunctionBegin; 2222 /* When a process holds entire A and other processes have no entry */ 2223 if (A->cmap->N == n) { 2224 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2225 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2226 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2227 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2228 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } else if (n == 0) { 2231 if (m) { 2232 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2233 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2234 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2235 } 2236 PetscFunctionReturn(0); 2237 } 2238 2239 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2240 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2241 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2242 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2243 2244 /* Get offdiagIdx[] for implicit 0.0 */ 2245 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2246 ba = bav; 2247 bi = b->i; 2248 bj = b->j; 2249 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2250 for (r = 0; r < m; r++) { 2251 ncols = bi[r+1] - bi[r]; 2252 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2253 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2254 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2255 offdiagA[r] = 0.0; 2256 2257 /* Find first hole in the cmap */ 2258 for (j=0; j<ncols; j++) { 2259 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2260 if (col > j && j < cstart) { 2261 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2262 break; 2263 } else if (col > j + n && j >= cstart) { 2264 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2265 break; 2266 } 2267 } 2268 if (j == ncols && ncols < A->cmap->N - n) { 2269 /* a hole is outside compressed Bcols */ 2270 if (ncols == 0) { 2271 if (cstart) { 2272 offdiagIdx[r] = 0; 2273 } else offdiagIdx[r] = cend; 2274 } else { /* ncols > 0 */ 2275 offdiagIdx[r] = cmap[ncols-1] + 1; 2276 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2277 } 2278 } 2279 } 2280 2281 for (j=0; j<ncols; j++) { 2282 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2283 ba++; bj++; 2284 } 2285 } 2286 2287 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2288 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2289 for (r = 0; r < m; ++r) { 2290 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2291 a[r] = diagA[r]; 2292 if (idx) idx[r] = cstart + diagIdx[r]; 2293 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2294 a[r] = diagA[r]; 2295 if (idx) { 2296 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2297 idx[r] = cstart + diagIdx[r]; 2298 } else idx[r] = offdiagIdx[r]; 2299 } 2300 } else { 2301 a[r] = offdiagA[r]; 2302 if (idx) idx[r] = offdiagIdx[r]; 2303 } 2304 } 2305 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2306 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2307 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2308 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2309 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2310 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2311 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2312 PetscFunctionReturn(0); 2313 } 2314 2315 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2316 { 2317 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2318 PetscInt m = A->rmap->n,n = A->cmap->n; 2319 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2320 PetscInt *cmap = mat->garray; 2321 PetscInt *diagIdx, *offdiagIdx; 2322 Vec diagV, offdiagV; 2323 PetscScalar *a, *diagA, *offdiagA; 2324 const PetscScalar *ba,*bav; 2325 PetscInt r,j,col,ncols,*bi,*bj; 2326 PetscErrorCode ierr; 2327 Mat B = mat->B; 2328 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2329 2330 PetscFunctionBegin; 2331 /* When a process holds entire A and other processes have no entry */ 2332 if (A->cmap->N == n) { 2333 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2334 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2335 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2336 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2337 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2338 PetscFunctionReturn(0); 2339 } else if (n == 0) { 2340 if (m) { 2341 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2342 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2343 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2344 } 2345 PetscFunctionReturn(0); 2346 } 2347 2348 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2349 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2350 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2351 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2352 2353 /* Get offdiagIdx[] for implicit 0.0 */ 2354 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2355 ba = bav; 2356 bi = b->i; 2357 bj = b->j; 2358 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2359 for (r = 0; r < m; r++) { 2360 ncols = bi[r+1] - bi[r]; 2361 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2362 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2363 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2364 offdiagA[r] = 0.0; 2365 2366 /* Find first hole in the cmap */ 2367 for (j=0; j<ncols; j++) { 2368 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2369 if (col > j && j < cstart) { 2370 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2371 break; 2372 } else if (col > j + n && j >= cstart) { 2373 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2374 break; 2375 } 2376 } 2377 if (j == ncols && ncols < A->cmap->N - n) { 2378 /* a hole is outside compressed Bcols */ 2379 if (ncols == 0) { 2380 if (cstart) { 2381 offdiagIdx[r] = 0; 2382 } else offdiagIdx[r] = cend; 2383 } else { /* ncols > 0 */ 2384 offdiagIdx[r] = cmap[ncols-1] + 1; 2385 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2386 } 2387 } 2388 } 2389 2390 for (j=0; j<ncols; j++) { 2391 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2392 ba++; bj++; 2393 } 2394 } 2395 2396 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2397 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2398 for (r = 0; r < m; ++r) { 2399 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2400 a[r] = diagA[r]; 2401 if (idx) idx[r] = cstart + diagIdx[r]; 2402 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2403 a[r] = diagA[r]; 2404 if (idx) { 2405 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2406 idx[r] = cstart + diagIdx[r]; 2407 } else idx[r] = offdiagIdx[r]; 2408 } 2409 } else { 2410 a[r] = offdiagA[r]; 2411 if (idx) idx[r] = offdiagIdx[r]; 2412 } 2413 } 2414 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2415 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2416 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2417 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2418 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2419 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2420 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2425 { 2426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2427 PetscInt m = A->rmap->n,n = A->cmap->n; 2428 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2429 PetscInt *cmap = mat->garray; 2430 PetscInt *diagIdx, *offdiagIdx; 2431 Vec diagV, offdiagV; 2432 PetscScalar *a, *diagA, *offdiagA; 2433 const PetscScalar *ba,*bav; 2434 PetscInt r,j,col,ncols,*bi,*bj; 2435 PetscErrorCode ierr; 2436 Mat B = mat->B; 2437 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2438 2439 PetscFunctionBegin; 2440 /* When a process holds entire A and other processes have no entry */ 2441 if (A->cmap->N == n) { 2442 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2443 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2444 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2445 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2446 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } else if (n == 0) { 2449 if (m) { 2450 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2451 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2452 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2453 } 2454 PetscFunctionReturn(0); 2455 } 2456 2457 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2458 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2459 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2460 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2461 2462 /* Get offdiagIdx[] for implicit 0.0 */ 2463 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2464 ba = bav; 2465 bi = b->i; 2466 bj = b->j; 2467 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2468 for (r = 0; r < m; r++) { 2469 ncols = bi[r+1] - bi[r]; 2470 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2471 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2472 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2473 offdiagA[r] = 0.0; 2474 2475 /* Find first hole in the cmap */ 2476 for (j=0; j<ncols; j++) { 2477 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2478 if (col > j && j < cstart) { 2479 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2480 break; 2481 } else if (col > j + n && j >= cstart) { 2482 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2483 break; 2484 } 2485 } 2486 if (j == ncols && ncols < A->cmap->N - n) { 2487 /* a hole is outside compressed Bcols */ 2488 if (ncols == 0) { 2489 if (cstart) { 2490 offdiagIdx[r] = 0; 2491 } else offdiagIdx[r] = cend; 2492 } else { /* ncols > 0 */ 2493 offdiagIdx[r] = cmap[ncols-1] + 1; 2494 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2495 } 2496 } 2497 } 2498 2499 for (j=0; j<ncols; j++) { 2500 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2501 ba++; bj++; 2502 } 2503 } 2504 2505 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2506 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2507 for (r = 0; r < m; ++r) { 2508 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2509 a[r] = diagA[r]; 2510 if (idx) idx[r] = cstart + diagIdx[r]; 2511 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2512 a[r] = diagA[r]; 2513 if (idx) { 2514 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2515 idx[r] = cstart + diagIdx[r]; 2516 } else idx[r] = offdiagIdx[r]; 2517 } 2518 } else { 2519 a[r] = offdiagA[r]; 2520 if (idx) idx[r] = offdiagIdx[r]; 2521 } 2522 } 2523 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2524 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2525 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2526 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2527 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2528 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2529 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2530 PetscFunctionReturn(0); 2531 } 2532 2533 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2534 { 2535 PetscErrorCode ierr; 2536 Mat *dummy; 2537 2538 PetscFunctionBegin; 2539 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2540 *newmat = *dummy; 2541 ierr = PetscFree(dummy);CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2546 { 2547 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2548 PetscErrorCode ierr; 2549 2550 PetscFunctionBegin; 2551 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2552 A->factorerrortype = a->A->factorerrortype; 2553 PetscFunctionReturn(0); 2554 } 2555 2556 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2557 { 2558 PetscErrorCode ierr; 2559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2560 2561 PetscFunctionBegin; 2562 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2563 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2564 if (x->assembled) { 2565 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2566 } else { 2567 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2568 } 2569 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2570 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2571 PetscFunctionReturn(0); 2572 } 2573 2574 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2575 { 2576 PetscFunctionBegin; 2577 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2578 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2579 PetscFunctionReturn(0); 2580 } 2581 2582 /*@ 2583 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2584 2585 Collective on Mat 2586 2587 Input Parameters: 2588 + A - the matrix 2589 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2590 2591 Level: advanced 2592 2593 @*/ 2594 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2595 { 2596 PetscErrorCode ierr; 2597 2598 PetscFunctionBegin; 2599 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2604 { 2605 PetscErrorCode ierr; 2606 PetscBool sc = PETSC_FALSE,flg; 2607 2608 PetscFunctionBegin; 2609 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2610 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2611 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2612 if (flg) { 2613 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2614 } 2615 ierr = PetscOptionsTail();CHKERRQ(ierr); 2616 PetscFunctionReturn(0); 2617 } 2618 2619 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2620 { 2621 PetscErrorCode ierr; 2622 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2623 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2624 2625 PetscFunctionBegin; 2626 if (!Y->preallocated) { 2627 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2628 } else if (!aij->nz) { 2629 PetscInt nonew = aij->nonew; 2630 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2631 aij->nonew = nonew; 2632 } 2633 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2634 PetscFunctionReturn(0); 2635 } 2636 2637 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2638 { 2639 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2640 PetscErrorCode ierr; 2641 2642 PetscFunctionBegin; 2643 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2644 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2645 if (d) { 2646 PetscInt rstart; 2647 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2648 *d += rstart; 2649 2650 } 2651 PetscFunctionReturn(0); 2652 } 2653 2654 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2655 { 2656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2657 PetscErrorCode ierr; 2658 2659 PetscFunctionBegin; 2660 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 /* -------------------------------------------------------------------*/ 2665 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2666 MatGetRow_MPIAIJ, 2667 MatRestoreRow_MPIAIJ, 2668 MatMult_MPIAIJ, 2669 /* 4*/ MatMultAdd_MPIAIJ, 2670 MatMultTranspose_MPIAIJ, 2671 MatMultTransposeAdd_MPIAIJ, 2672 NULL, 2673 NULL, 2674 NULL, 2675 /*10*/ NULL, 2676 NULL, 2677 NULL, 2678 MatSOR_MPIAIJ, 2679 MatTranspose_MPIAIJ, 2680 /*15*/ MatGetInfo_MPIAIJ, 2681 MatEqual_MPIAIJ, 2682 MatGetDiagonal_MPIAIJ, 2683 MatDiagonalScale_MPIAIJ, 2684 MatNorm_MPIAIJ, 2685 /*20*/ MatAssemblyBegin_MPIAIJ, 2686 MatAssemblyEnd_MPIAIJ, 2687 MatSetOption_MPIAIJ, 2688 MatZeroEntries_MPIAIJ, 2689 /*24*/ MatZeroRows_MPIAIJ, 2690 NULL, 2691 NULL, 2692 NULL, 2693 NULL, 2694 /*29*/ MatSetUp_MPIAIJ, 2695 NULL, 2696 NULL, 2697 MatGetDiagonalBlock_MPIAIJ, 2698 NULL, 2699 /*34*/ MatDuplicate_MPIAIJ, 2700 NULL, 2701 NULL, 2702 NULL, 2703 NULL, 2704 /*39*/ MatAXPY_MPIAIJ, 2705 MatCreateSubMatrices_MPIAIJ, 2706 MatIncreaseOverlap_MPIAIJ, 2707 MatGetValues_MPIAIJ, 2708 MatCopy_MPIAIJ, 2709 /*44*/ MatGetRowMax_MPIAIJ, 2710 MatScale_MPIAIJ, 2711 MatShift_MPIAIJ, 2712 MatDiagonalSet_MPIAIJ, 2713 MatZeroRowsColumns_MPIAIJ, 2714 /*49*/ MatSetRandom_MPIAIJ, 2715 NULL, 2716 NULL, 2717 NULL, 2718 NULL, 2719 /*54*/ MatFDColoringCreate_MPIXAIJ, 2720 NULL, 2721 MatSetUnfactored_MPIAIJ, 2722 MatPermute_MPIAIJ, 2723 NULL, 2724 /*59*/ MatCreateSubMatrix_MPIAIJ, 2725 MatDestroy_MPIAIJ, 2726 MatView_MPIAIJ, 2727 NULL, 2728 NULL, 2729 /*64*/ NULL, 2730 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2735 MatGetRowMinAbs_MPIAIJ, 2736 NULL, 2737 NULL, 2738 NULL, 2739 NULL, 2740 /*75*/ MatFDColoringApply_AIJ, 2741 MatSetFromOptions_MPIAIJ, 2742 NULL, 2743 NULL, 2744 MatFindZeroDiagonals_MPIAIJ, 2745 /*80*/ NULL, 2746 NULL, 2747 NULL, 2748 /*83*/ MatLoad_MPIAIJ, 2749 MatIsSymmetric_MPIAIJ, 2750 NULL, 2751 NULL, 2752 NULL, 2753 NULL, 2754 /*89*/ NULL, 2755 NULL, 2756 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2757 NULL, 2758 NULL, 2759 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2760 NULL, 2761 NULL, 2762 NULL, 2763 MatBindToCPU_MPIAIJ, 2764 /*99*/ MatProductSetFromOptions_MPIAIJ, 2765 NULL, 2766 NULL, 2767 MatConjugate_MPIAIJ, 2768 NULL, 2769 /*104*/MatSetValuesRow_MPIAIJ, 2770 MatRealPart_MPIAIJ, 2771 MatImaginaryPart_MPIAIJ, 2772 NULL, 2773 NULL, 2774 /*109*/NULL, 2775 NULL, 2776 MatGetRowMin_MPIAIJ, 2777 NULL, 2778 MatMissingDiagonal_MPIAIJ, 2779 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2780 NULL, 2781 MatGetGhosts_MPIAIJ, 2782 NULL, 2783 NULL, 2784 /*119*/MatMultDiagonalBlock_MPIAIJ, 2785 NULL, 2786 NULL, 2787 NULL, 2788 MatGetMultiProcBlock_MPIAIJ, 2789 /*124*/MatFindNonzeroRows_MPIAIJ, 2790 MatGetColumnNorms_MPIAIJ, 2791 MatInvertBlockDiagonal_MPIAIJ, 2792 MatInvertVariableBlockDiagonal_MPIAIJ, 2793 MatCreateSubMatricesMPI_MPIAIJ, 2794 /*129*/NULL, 2795 NULL, 2796 NULL, 2797 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2798 NULL, 2799 /*134*/NULL, 2800 NULL, 2801 NULL, 2802 NULL, 2803 NULL, 2804 /*139*/MatSetBlockSizes_MPIAIJ, 2805 NULL, 2806 NULL, 2807 MatFDColoringSetUp_MPIXAIJ, 2808 MatFindOffBlockDiagonalEntries_MPIAIJ, 2809 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2810 /*145*/NULL, 2811 NULL, 2812 NULL 2813 }; 2814 2815 /* ----------------------------------------------------------------------------------------*/ 2816 2817 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2818 { 2819 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2820 PetscErrorCode ierr; 2821 2822 PetscFunctionBegin; 2823 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2824 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2825 PetscFunctionReturn(0); 2826 } 2827 2828 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2829 { 2830 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2831 PetscErrorCode ierr; 2832 2833 PetscFunctionBegin; 2834 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2835 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2836 PetscFunctionReturn(0); 2837 } 2838 2839 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2840 { 2841 Mat_MPIAIJ *b; 2842 PetscErrorCode ierr; 2843 PetscMPIInt size; 2844 2845 PetscFunctionBegin; 2846 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2847 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2848 b = (Mat_MPIAIJ*)B->data; 2849 2850 #if defined(PETSC_USE_CTABLE) 2851 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2852 #else 2853 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2854 #endif 2855 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2856 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2857 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2858 2859 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2860 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2861 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2862 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2863 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2864 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2865 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2866 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2867 2868 if (!B->preallocated) { 2869 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2870 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2871 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2872 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2873 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2874 } 2875 2876 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2877 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2878 B->preallocated = PETSC_TRUE; 2879 B->was_assembled = PETSC_FALSE; 2880 B->assembled = PETSC_FALSE; 2881 PetscFunctionReturn(0); 2882 } 2883 2884 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2885 { 2886 Mat_MPIAIJ *b; 2887 PetscErrorCode ierr; 2888 2889 PetscFunctionBegin; 2890 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2891 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2892 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2893 b = (Mat_MPIAIJ*)B->data; 2894 2895 #if defined(PETSC_USE_CTABLE) 2896 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2897 #else 2898 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2899 #endif 2900 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2901 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2902 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2903 2904 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2905 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2906 B->preallocated = PETSC_TRUE; 2907 B->was_assembled = PETSC_FALSE; 2908 B->assembled = PETSC_FALSE; 2909 PetscFunctionReturn(0); 2910 } 2911 2912 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2913 { 2914 Mat mat; 2915 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2916 PetscErrorCode ierr; 2917 2918 PetscFunctionBegin; 2919 *newmat = NULL; 2920 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2921 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2922 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2923 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2924 a = (Mat_MPIAIJ*)mat->data; 2925 2926 mat->factortype = matin->factortype; 2927 mat->assembled = matin->assembled; 2928 mat->insertmode = NOT_SET_VALUES; 2929 mat->preallocated = matin->preallocated; 2930 2931 a->size = oldmat->size; 2932 a->rank = oldmat->rank; 2933 a->donotstash = oldmat->donotstash; 2934 a->roworiented = oldmat->roworiented; 2935 a->rowindices = NULL; 2936 a->rowvalues = NULL; 2937 a->getrowactive = PETSC_FALSE; 2938 2939 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2940 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2941 2942 if (oldmat->colmap) { 2943 #if defined(PETSC_USE_CTABLE) 2944 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2945 #else 2946 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2947 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2948 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2949 #endif 2950 } else a->colmap = NULL; 2951 if (oldmat->garray) { 2952 PetscInt len; 2953 len = oldmat->B->cmap->n; 2954 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2955 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2956 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2957 } else a->garray = NULL; 2958 2959 /* It may happen MatDuplicate is called with a non-assembled matrix 2960 In fact, MatDuplicate only requires the matrix to be preallocated 2961 This may happen inside a DMCreateMatrix_Shell */ 2962 if (oldmat->lvec) { 2963 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2964 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2965 } 2966 if (oldmat->Mvctx) { 2967 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2968 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2969 } 2970 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2971 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2972 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2973 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2974 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2975 *newmat = mat; 2976 PetscFunctionReturn(0); 2977 } 2978 2979 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2980 { 2981 PetscBool isbinary, ishdf5; 2982 PetscErrorCode ierr; 2983 2984 PetscFunctionBegin; 2985 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2986 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2987 /* force binary viewer to load .info file if it has not yet done so */ 2988 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2989 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2990 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2991 if (isbinary) { 2992 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2993 } else if (ishdf5) { 2994 #if defined(PETSC_HAVE_HDF5) 2995 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2996 #else 2997 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2998 #endif 2999 } else { 3000 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3001 } 3002 PetscFunctionReturn(0); 3003 } 3004 3005 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3006 { 3007 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3008 PetscInt *rowidxs,*colidxs; 3009 PetscScalar *matvals; 3010 PetscErrorCode ierr; 3011 3012 PetscFunctionBegin; 3013 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3014 3015 /* read in matrix header */ 3016 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3017 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3018 M = header[1]; N = header[2]; nz = header[3]; 3019 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3020 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3021 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3022 3023 /* set block sizes from the viewer's .info file */ 3024 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3025 /* set global sizes if not set already */ 3026 if (mat->rmap->N < 0) mat->rmap->N = M; 3027 if (mat->cmap->N < 0) mat->cmap->N = N; 3028 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3029 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3030 3031 /* check if the matrix sizes are correct */ 3032 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3033 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3034 3035 /* read in row lengths and build row indices */ 3036 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3037 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3038 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3039 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3040 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3041 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3042 /* read in column indices and matrix values */ 3043 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3044 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3045 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3046 /* store matrix indices and values */ 3047 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3048 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3049 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3050 PetscFunctionReturn(0); 3051 } 3052 3053 /* Not scalable because of ISAllGather() unless getting all columns. */ 3054 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3055 { 3056 PetscErrorCode ierr; 3057 IS iscol_local; 3058 PetscBool isstride; 3059 PetscMPIInt lisstride=0,gisstride; 3060 3061 PetscFunctionBegin; 3062 /* check if we are grabbing all columns*/ 3063 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3064 3065 if (isstride) { 3066 PetscInt start,len,mstart,mlen; 3067 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3068 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3069 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3070 if (mstart == start && mlen-mstart == len) lisstride = 1; 3071 } 3072 3073 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3074 if (gisstride) { 3075 PetscInt N; 3076 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3077 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3078 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3079 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3080 } else { 3081 PetscInt cbs; 3082 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3083 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3084 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3085 } 3086 3087 *isseq = iscol_local; 3088 PetscFunctionReturn(0); 3089 } 3090 3091 /* 3092 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3093 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3094 3095 Input Parameters: 3096 mat - matrix 3097 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3098 i.e., mat->rstart <= isrow[i] < mat->rend 3099 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3100 i.e., mat->cstart <= iscol[i] < mat->cend 3101 Output Parameter: 3102 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3103 iscol_o - sequential column index set for retrieving mat->B 3104 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3105 */ 3106 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3107 { 3108 PetscErrorCode ierr; 3109 Vec x,cmap; 3110 const PetscInt *is_idx; 3111 PetscScalar *xarray,*cmaparray; 3112 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3113 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3114 Mat B=a->B; 3115 Vec lvec=a->lvec,lcmap; 3116 PetscInt i,cstart,cend,Bn=B->cmap->N; 3117 MPI_Comm comm; 3118 VecScatter Mvctx=a->Mvctx; 3119 3120 PetscFunctionBegin; 3121 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3122 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3123 3124 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3125 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3126 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3127 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3128 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3129 3130 /* Get start indices */ 3131 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3132 isstart -= ncols; 3133 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3134 3135 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3136 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3137 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3138 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3139 for (i=0; i<ncols; i++) { 3140 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3141 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3142 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3143 } 3144 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3145 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3146 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3147 3148 /* Get iscol_d */ 3149 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3150 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3151 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3152 3153 /* Get isrow_d */ 3154 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3155 rstart = mat->rmap->rstart; 3156 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3157 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3158 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3159 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3160 3161 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3162 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3163 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3164 3165 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3166 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3167 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3168 3169 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3170 3171 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3172 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3173 3174 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3175 /* off-process column indices */ 3176 count = 0; 3177 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3178 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3179 3180 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3181 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3182 for (i=0; i<Bn; i++) { 3183 if (PetscRealPart(xarray[i]) > -1.0) { 3184 idx[count] = i; /* local column index in off-diagonal part B */ 3185 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3186 count++; 3187 } 3188 } 3189 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3190 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3191 3192 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3193 /* cannot ensure iscol_o has same blocksize as iscol! */ 3194 3195 ierr = PetscFree(idx);CHKERRQ(ierr); 3196 *garray = cmap1; 3197 3198 ierr = VecDestroy(&x);CHKERRQ(ierr); 3199 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3200 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3201 PetscFunctionReturn(0); 3202 } 3203 3204 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3205 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3206 { 3207 PetscErrorCode ierr; 3208 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3209 Mat M = NULL; 3210 MPI_Comm comm; 3211 IS iscol_d,isrow_d,iscol_o; 3212 Mat Asub = NULL,Bsub = NULL; 3213 PetscInt n; 3214 3215 PetscFunctionBegin; 3216 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3217 3218 if (call == MAT_REUSE_MATRIX) { 3219 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3220 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3221 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3222 3223 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3224 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3225 3226 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3227 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3228 3229 /* Update diagonal and off-diagonal portions of submat */ 3230 asub = (Mat_MPIAIJ*)(*submat)->data; 3231 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3232 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3233 if (n) { 3234 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3235 } 3236 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3237 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3238 3239 } else { /* call == MAT_INITIAL_MATRIX) */ 3240 const PetscInt *garray; 3241 PetscInt BsubN; 3242 3243 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3244 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3245 3246 /* Create local submatrices Asub and Bsub */ 3247 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3248 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3249 3250 /* Create submatrix M */ 3251 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3252 3253 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3254 asub = (Mat_MPIAIJ*)M->data; 3255 3256 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3257 n = asub->B->cmap->N; 3258 if (BsubN > n) { 3259 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3260 const PetscInt *idx; 3261 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3262 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3263 3264 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3265 j = 0; 3266 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3267 for (i=0; i<n; i++) { 3268 if (j >= BsubN) break; 3269 while (subgarray[i] > garray[j]) j++; 3270 3271 if (subgarray[i] == garray[j]) { 3272 idx_new[i] = idx[j++]; 3273 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3274 } 3275 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3276 3277 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3278 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3279 3280 } else if (BsubN < n) { 3281 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3282 } 3283 3284 ierr = PetscFree(garray);CHKERRQ(ierr); 3285 *submat = M; 3286 3287 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3288 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3289 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3290 3291 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3292 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3293 3294 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3295 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3296 } 3297 PetscFunctionReturn(0); 3298 } 3299 3300 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3301 { 3302 PetscErrorCode ierr; 3303 IS iscol_local=NULL,isrow_d; 3304 PetscInt csize; 3305 PetscInt n,i,j,start,end; 3306 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3307 MPI_Comm comm; 3308 3309 PetscFunctionBegin; 3310 /* If isrow has same processor distribution as mat, 3311 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3312 if (call == MAT_REUSE_MATRIX) { 3313 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3314 if (isrow_d) { 3315 sameRowDist = PETSC_TRUE; 3316 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3317 } else { 3318 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3319 if (iscol_local) { 3320 sameRowDist = PETSC_TRUE; 3321 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3322 } 3323 } 3324 } else { 3325 /* Check if isrow has same processor distribution as mat */ 3326 sameDist[0] = PETSC_FALSE; 3327 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3328 if (!n) { 3329 sameDist[0] = PETSC_TRUE; 3330 } else { 3331 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3332 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3333 if (i >= start && j < end) { 3334 sameDist[0] = PETSC_TRUE; 3335 } 3336 } 3337 3338 /* Check if iscol has same processor distribution as mat */ 3339 sameDist[1] = PETSC_FALSE; 3340 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3341 if (!n) { 3342 sameDist[1] = PETSC_TRUE; 3343 } else { 3344 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3345 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3346 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3347 } 3348 3349 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3350 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3351 sameRowDist = tsameDist[0]; 3352 } 3353 3354 if (sameRowDist) { 3355 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3356 /* isrow and iscol have same processor distribution as mat */ 3357 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3358 PetscFunctionReturn(0); 3359 } else { /* sameRowDist */ 3360 /* isrow has same processor distribution as mat */ 3361 if (call == MAT_INITIAL_MATRIX) { 3362 PetscBool sorted; 3363 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3364 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3365 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3366 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3367 3368 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3369 if (sorted) { 3370 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3371 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3372 PetscFunctionReturn(0); 3373 } 3374 } else { /* call == MAT_REUSE_MATRIX */ 3375 IS iscol_sub; 3376 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3377 if (iscol_sub) { 3378 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3379 PetscFunctionReturn(0); 3380 } 3381 } 3382 } 3383 } 3384 3385 /* General case: iscol -> iscol_local which has global size of iscol */ 3386 if (call == MAT_REUSE_MATRIX) { 3387 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3388 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3389 } else { 3390 if (!iscol_local) { 3391 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3392 } 3393 } 3394 3395 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3396 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3397 3398 if (call == MAT_INITIAL_MATRIX) { 3399 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3400 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3401 } 3402 PetscFunctionReturn(0); 3403 } 3404 3405 /*@C 3406 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3407 and "off-diagonal" part of the matrix in CSR format. 3408 3409 Collective 3410 3411 Input Parameters: 3412 + comm - MPI communicator 3413 . A - "diagonal" portion of matrix 3414 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3415 - garray - global index of B columns 3416 3417 Output Parameter: 3418 . mat - the matrix, with input A as its local diagonal matrix 3419 Level: advanced 3420 3421 Notes: 3422 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3423 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3424 3425 .seealso: MatCreateMPIAIJWithSplitArrays() 3426 @*/ 3427 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3428 { 3429 PetscErrorCode ierr; 3430 Mat_MPIAIJ *maij; 3431 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3432 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3433 const PetscScalar *oa; 3434 Mat Bnew; 3435 PetscInt m,n,N; 3436 3437 PetscFunctionBegin; 3438 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3439 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3440 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3441 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3442 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3443 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3444 3445 /* Get global columns of mat */ 3446 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3447 3448 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3449 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3450 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3451 maij = (Mat_MPIAIJ*)(*mat)->data; 3452 3453 (*mat)->preallocated = PETSC_TRUE; 3454 3455 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3456 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3457 3458 /* Set A as diagonal portion of *mat */ 3459 maij->A = A; 3460 3461 nz = oi[m]; 3462 for (i=0; i<nz; i++) { 3463 col = oj[i]; 3464 oj[i] = garray[col]; 3465 } 3466 3467 /* Set Bnew as off-diagonal portion of *mat */ 3468 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3469 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3470 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3471 bnew = (Mat_SeqAIJ*)Bnew->data; 3472 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3473 maij->B = Bnew; 3474 3475 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3476 3477 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3478 b->free_a = PETSC_FALSE; 3479 b->free_ij = PETSC_FALSE; 3480 ierr = MatDestroy(&B);CHKERRQ(ierr); 3481 3482 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3483 bnew->free_a = PETSC_TRUE; 3484 bnew->free_ij = PETSC_TRUE; 3485 3486 /* condense columns of maij->B */ 3487 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3488 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3489 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3490 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3491 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3492 PetscFunctionReturn(0); 3493 } 3494 3495 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3496 3497 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3498 { 3499 PetscErrorCode ierr; 3500 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3501 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3502 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3503 Mat M,Msub,B=a->B; 3504 MatScalar *aa; 3505 Mat_SeqAIJ *aij; 3506 PetscInt *garray = a->garray,*colsub,Ncols; 3507 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3508 IS iscol_sub,iscmap; 3509 const PetscInt *is_idx,*cmap; 3510 PetscBool allcolumns=PETSC_FALSE; 3511 MPI_Comm comm; 3512 3513 PetscFunctionBegin; 3514 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3515 if (call == MAT_REUSE_MATRIX) { 3516 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3517 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3518 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3519 3520 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3521 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3522 3523 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3524 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3525 3526 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3527 3528 } else { /* call == MAT_INITIAL_MATRIX) */ 3529 PetscBool flg; 3530 3531 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3532 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3533 3534 /* (1) iscol -> nonscalable iscol_local */ 3535 /* Check for special case: each processor gets entire matrix columns */ 3536 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3537 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3538 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3539 if (allcolumns) { 3540 iscol_sub = iscol_local; 3541 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3542 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3543 3544 } else { 3545 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3546 PetscInt *idx,*cmap1,k; 3547 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3548 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3549 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3550 count = 0; 3551 k = 0; 3552 for (i=0; i<Ncols; i++) { 3553 j = is_idx[i]; 3554 if (j >= cstart && j < cend) { 3555 /* diagonal part of mat */ 3556 idx[count] = j; 3557 cmap1[count++] = i; /* column index in submat */ 3558 } else if (Bn) { 3559 /* off-diagonal part of mat */ 3560 if (j == garray[k]) { 3561 idx[count] = j; 3562 cmap1[count++] = i; /* column index in submat */ 3563 } else if (j > garray[k]) { 3564 while (j > garray[k] && k < Bn-1) k++; 3565 if (j == garray[k]) { 3566 idx[count] = j; 3567 cmap1[count++] = i; /* column index in submat */ 3568 } 3569 } 3570 } 3571 } 3572 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3573 3574 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3575 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3576 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3577 3578 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3579 } 3580 3581 /* (3) Create sequential Msub */ 3582 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3583 } 3584 3585 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3586 aij = (Mat_SeqAIJ*)(Msub)->data; 3587 ii = aij->i; 3588 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3589 3590 /* 3591 m - number of local rows 3592 Ncols - number of columns (same on all processors) 3593 rstart - first row in new global matrix generated 3594 */ 3595 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3596 3597 if (call == MAT_INITIAL_MATRIX) { 3598 /* (4) Create parallel newmat */ 3599 PetscMPIInt rank,size; 3600 PetscInt csize; 3601 3602 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3603 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3604 3605 /* 3606 Determine the number of non-zeros in the diagonal and off-diagonal 3607 portions of the matrix in order to do correct preallocation 3608 */ 3609 3610 /* first get start and end of "diagonal" columns */ 3611 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3612 if (csize == PETSC_DECIDE) { 3613 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3614 if (mglobal == Ncols) { /* square matrix */ 3615 nlocal = m; 3616 } else { 3617 nlocal = Ncols/size + ((Ncols % size) > rank); 3618 } 3619 } else { 3620 nlocal = csize; 3621 } 3622 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3623 rstart = rend - nlocal; 3624 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3625 3626 /* next, compute all the lengths */ 3627 jj = aij->j; 3628 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3629 olens = dlens + m; 3630 for (i=0; i<m; i++) { 3631 jend = ii[i+1] - ii[i]; 3632 olen = 0; 3633 dlen = 0; 3634 for (j=0; j<jend; j++) { 3635 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3636 else dlen++; 3637 jj++; 3638 } 3639 olens[i] = olen; 3640 dlens[i] = dlen; 3641 } 3642 3643 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3644 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3645 3646 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3647 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3648 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3649 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3650 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3651 ierr = PetscFree(dlens);CHKERRQ(ierr); 3652 3653 } else { /* call == MAT_REUSE_MATRIX */ 3654 M = *newmat; 3655 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3656 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3657 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3658 /* 3659 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3660 rather than the slower MatSetValues(). 3661 */ 3662 M->was_assembled = PETSC_TRUE; 3663 M->assembled = PETSC_FALSE; 3664 } 3665 3666 /* (5) Set values of Msub to *newmat */ 3667 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3668 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3669 3670 jj = aij->j; 3671 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3672 for (i=0; i<m; i++) { 3673 row = rstart + i; 3674 nz = ii[i+1] - ii[i]; 3675 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3676 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3677 jj += nz; aa += nz; 3678 } 3679 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3680 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3681 3682 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3683 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3684 3685 ierr = PetscFree(colsub);CHKERRQ(ierr); 3686 3687 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3688 if (call == MAT_INITIAL_MATRIX) { 3689 *newmat = M; 3690 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3691 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3692 3693 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3694 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3695 3696 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3697 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3698 3699 if (iscol_local) { 3700 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3701 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3702 } 3703 } 3704 PetscFunctionReturn(0); 3705 } 3706 3707 /* 3708 Not great since it makes two copies of the submatrix, first an SeqAIJ 3709 in local and then by concatenating the local matrices the end result. 3710 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3711 3712 Note: This requires a sequential iscol with all indices. 3713 */ 3714 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3715 { 3716 PetscErrorCode ierr; 3717 PetscMPIInt rank,size; 3718 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3719 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3720 Mat M,Mreuse; 3721 MatScalar *aa,*vwork; 3722 MPI_Comm comm; 3723 Mat_SeqAIJ *aij; 3724 PetscBool colflag,allcolumns=PETSC_FALSE; 3725 3726 PetscFunctionBegin; 3727 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3728 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3729 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3730 3731 /* Check for special case: each processor gets entire matrix columns */ 3732 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3733 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3734 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3735 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3736 3737 if (call == MAT_REUSE_MATRIX) { 3738 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3739 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3740 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3741 } else { 3742 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3743 } 3744 3745 /* 3746 m - number of local rows 3747 n - number of columns (same on all processors) 3748 rstart - first row in new global matrix generated 3749 */ 3750 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3751 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3752 if (call == MAT_INITIAL_MATRIX) { 3753 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3754 ii = aij->i; 3755 jj = aij->j; 3756 3757 /* 3758 Determine the number of non-zeros in the diagonal and off-diagonal 3759 portions of the matrix in order to do correct preallocation 3760 */ 3761 3762 /* first get start and end of "diagonal" columns */ 3763 if (csize == PETSC_DECIDE) { 3764 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3765 if (mglobal == n) { /* square matrix */ 3766 nlocal = m; 3767 } else { 3768 nlocal = n/size + ((n % size) > rank); 3769 } 3770 } else { 3771 nlocal = csize; 3772 } 3773 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3774 rstart = rend - nlocal; 3775 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3776 3777 /* next, compute all the lengths */ 3778 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3779 olens = dlens + m; 3780 for (i=0; i<m; i++) { 3781 jend = ii[i+1] - ii[i]; 3782 olen = 0; 3783 dlen = 0; 3784 for (j=0; j<jend; j++) { 3785 if (*jj < rstart || *jj >= rend) olen++; 3786 else dlen++; 3787 jj++; 3788 } 3789 olens[i] = olen; 3790 dlens[i] = dlen; 3791 } 3792 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3793 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3794 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3795 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3796 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3797 ierr = PetscFree(dlens);CHKERRQ(ierr); 3798 } else { 3799 PetscInt ml,nl; 3800 3801 M = *newmat; 3802 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3803 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3804 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3805 /* 3806 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3807 rather than the slower MatSetValues(). 3808 */ 3809 M->was_assembled = PETSC_TRUE; 3810 M->assembled = PETSC_FALSE; 3811 } 3812 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3813 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3814 ii = aij->i; 3815 jj = aij->j; 3816 3817 /* trigger copy to CPU if needed */ 3818 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3819 for (i=0; i<m; i++) { 3820 row = rstart + i; 3821 nz = ii[i+1] - ii[i]; 3822 cwork = jj; jj += nz; 3823 vwork = aa; aa += nz; 3824 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3825 } 3826 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3827 3828 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3829 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 *newmat = M; 3831 3832 /* save submatrix used in processor for next request */ 3833 if (call == MAT_INITIAL_MATRIX) { 3834 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3835 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3836 } 3837 PetscFunctionReturn(0); 3838 } 3839 3840 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3841 { 3842 PetscInt m,cstart, cend,j,nnz,i,d; 3843 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3844 const PetscInt *JJ; 3845 PetscErrorCode ierr; 3846 PetscBool nooffprocentries; 3847 3848 PetscFunctionBegin; 3849 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3850 3851 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3852 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3853 m = B->rmap->n; 3854 cstart = B->cmap->rstart; 3855 cend = B->cmap->rend; 3856 rstart = B->rmap->rstart; 3857 3858 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3859 3860 if (PetscDefined(USE_DEBUG)) { 3861 for (i=0; i<m; i++) { 3862 nnz = Ii[i+1]- Ii[i]; 3863 JJ = J + Ii[i]; 3864 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3865 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3866 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3867 } 3868 } 3869 3870 for (i=0; i<m; i++) { 3871 nnz = Ii[i+1]- Ii[i]; 3872 JJ = J + Ii[i]; 3873 nnz_max = PetscMax(nnz_max,nnz); 3874 d = 0; 3875 for (j=0; j<nnz; j++) { 3876 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3877 } 3878 d_nnz[i] = d; 3879 o_nnz[i] = nnz - d; 3880 } 3881 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3882 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3883 3884 for (i=0; i<m; i++) { 3885 ii = i + rstart; 3886 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3887 } 3888 nooffprocentries = B->nooffprocentries; 3889 B->nooffprocentries = PETSC_TRUE; 3890 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3891 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 B->nooffprocentries = nooffprocentries; 3893 3894 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3895 PetscFunctionReturn(0); 3896 } 3897 3898 /*@ 3899 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3900 (the default parallel PETSc format). 3901 3902 Collective 3903 3904 Input Parameters: 3905 + B - the matrix 3906 . i - the indices into j for the start of each local row (starts with zero) 3907 . j - the column indices for each local row (starts with zero) 3908 - v - optional values in the matrix 3909 3910 Level: developer 3911 3912 Notes: 3913 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3914 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3915 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3916 3917 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3918 3919 The format which is used for the sparse matrix input, is equivalent to a 3920 row-major ordering.. i.e for the following matrix, the input data expected is 3921 as shown 3922 3923 $ 1 0 0 3924 $ 2 0 3 P0 3925 $ ------- 3926 $ 4 5 6 P1 3927 $ 3928 $ Process0 [P0]: rows_owned=[0,1] 3929 $ i = {0,1,3} [size = nrow+1 = 2+1] 3930 $ j = {0,0,2} [size = 3] 3931 $ v = {1,2,3} [size = 3] 3932 $ 3933 $ Process1 [P1]: rows_owned=[2] 3934 $ i = {0,3} [size = nrow+1 = 1+1] 3935 $ j = {0,1,2} [size = 3] 3936 $ v = {4,5,6} [size = 3] 3937 3938 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3939 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3940 @*/ 3941 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3942 { 3943 PetscErrorCode ierr; 3944 3945 PetscFunctionBegin; 3946 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3947 PetscFunctionReturn(0); 3948 } 3949 3950 /*@C 3951 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3952 (the default parallel PETSc format). For good matrix assembly performance 3953 the user should preallocate the matrix storage by setting the parameters 3954 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3955 performance can be increased by more than a factor of 50. 3956 3957 Collective 3958 3959 Input Parameters: 3960 + B - the matrix 3961 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3962 (same value is used for all local rows) 3963 . d_nnz - array containing the number of nonzeros in the various rows of the 3964 DIAGONAL portion of the local submatrix (possibly different for each row) 3965 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3966 The size of this array is equal to the number of local rows, i.e 'm'. 3967 For matrices that will be factored, you must leave room for (and set) 3968 the diagonal entry even if it is zero. 3969 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3970 submatrix (same value is used for all local rows). 3971 - o_nnz - array containing the number of nonzeros in the various rows of the 3972 OFF-DIAGONAL portion of the local submatrix (possibly different for 3973 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3974 structure. The size of this array is equal to the number 3975 of local rows, i.e 'm'. 3976 3977 If the *_nnz parameter is given then the *_nz parameter is ignored 3978 3979 The AIJ format (also called the Yale sparse matrix format or 3980 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3981 storage. The stored row and column indices begin with zero. 3982 See Users-Manual: ch_mat for details. 3983 3984 The parallel matrix is partitioned such that the first m0 rows belong to 3985 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3986 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3987 3988 The DIAGONAL portion of the local submatrix of a processor can be defined 3989 as the submatrix which is obtained by extraction the part corresponding to 3990 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3991 first row that belongs to the processor, r2 is the last row belonging to 3992 the this processor, and c1-c2 is range of indices of the local part of a 3993 vector suitable for applying the matrix to. This is an mxn matrix. In the 3994 common case of a square matrix, the row and column ranges are the same and 3995 the DIAGONAL part is also square. The remaining portion of the local 3996 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3997 3998 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3999 4000 You can call MatGetInfo() to get information on how effective the preallocation was; 4001 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4002 You can also run with the option -info and look for messages with the string 4003 malloc in them to see if additional memory allocation was needed. 4004 4005 Example usage: 4006 4007 Consider the following 8x8 matrix with 34 non-zero values, that is 4008 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4009 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4010 as follows: 4011 4012 .vb 4013 1 2 0 | 0 3 0 | 0 4 4014 Proc0 0 5 6 | 7 0 0 | 8 0 4015 9 0 10 | 11 0 0 | 12 0 4016 ------------------------------------- 4017 13 0 14 | 15 16 17 | 0 0 4018 Proc1 0 18 0 | 19 20 21 | 0 0 4019 0 0 0 | 22 23 0 | 24 0 4020 ------------------------------------- 4021 Proc2 25 26 27 | 0 0 28 | 29 0 4022 30 0 0 | 31 32 33 | 0 34 4023 .ve 4024 4025 This can be represented as a collection of submatrices as: 4026 4027 .vb 4028 A B C 4029 D E F 4030 G H I 4031 .ve 4032 4033 Where the submatrices A,B,C are owned by proc0, D,E,F are 4034 owned by proc1, G,H,I are owned by proc2. 4035 4036 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4037 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4038 The 'M','N' parameters are 8,8, and have the same values on all procs. 4039 4040 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4041 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4042 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4043 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4044 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4045 matrix, ans [DF] as another SeqAIJ matrix. 4046 4047 When d_nz, o_nz parameters are specified, d_nz storage elements are 4048 allocated for every row of the local diagonal submatrix, and o_nz 4049 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4050 One way to choose d_nz and o_nz is to use the max nonzerors per local 4051 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4052 In this case, the values of d_nz,o_nz are: 4053 .vb 4054 proc0 : dnz = 2, o_nz = 2 4055 proc1 : dnz = 3, o_nz = 2 4056 proc2 : dnz = 1, o_nz = 4 4057 .ve 4058 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4059 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4060 for proc3. i.e we are using 12+15+10=37 storage locations to store 4061 34 values. 4062 4063 When d_nnz, o_nnz parameters are specified, the storage is specified 4064 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4065 In the above case the values for d_nnz,o_nnz are: 4066 .vb 4067 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4068 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4069 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4070 .ve 4071 Here the space allocated is sum of all the above values i.e 34, and 4072 hence pre-allocation is perfect. 4073 4074 Level: intermediate 4075 4076 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4077 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4078 @*/ 4079 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4080 { 4081 PetscErrorCode ierr; 4082 4083 PetscFunctionBegin; 4084 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4085 PetscValidType(B,1); 4086 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4087 PetscFunctionReturn(0); 4088 } 4089 4090 /*@ 4091 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4092 CSR format for the local rows. 4093 4094 Collective 4095 4096 Input Parameters: 4097 + comm - MPI communicator 4098 . m - number of local rows (Cannot be PETSC_DECIDE) 4099 . n - This value should be the same as the local size used in creating the 4100 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4101 calculated if N is given) For square matrices n is almost always m. 4102 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4103 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4104 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4105 . j - column indices 4106 - a - matrix values 4107 4108 Output Parameter: 4109 . mat - the matrix 4110 4111 Level: intermediate 4112 4113 Notes: 4114 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4115 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4116 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4117 4118 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4119 4120 The format which is used for the sparse matrix input, is equivalent to a 4121 row-major ordering.. i.e for the following matrix, the input data expected is 4122 as shown 4123 4124 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4125 4126 $ 1 0 0 4127 $ 2 0 3 P0 4128 $ ------- 4129 $ 4 5 6 P1 4130 $ 4131 $ Process0 [P0]: rows_owned=[0,1] 4132 $ i = {0,1,3} [size = nrow+1 = 2+1] 4133 $ j = {0,0,2} [size = 3] 4134 $ v = {1,2,3} [size = 3] 4135 $ 4136 $ Process1 [P1]: rows_owned=[2] 4137 $ i = {0,3} [size = nrow+1 = 1+1] 4138 $ j = {0,1,2} [size = 3] 4139 $ v = {4,5,6} [size = 3] 4140 4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4142 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4143 @*/ 4144 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4145 { 4146 PetscErrorCode ierr; 4147 4148 PetscFunctionBegin; 4149 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4150 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4151 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4152 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4153 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4154 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4155 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4156 PetscFunctionReturn(0); 4157 } 4158 4159 /*@ 4160 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4161 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4162 4163 Collective 4164 4165 Input Parameters: 4166 + mat - the matrix 4167 . m - number of local rows (Cannot be PETSC_DECIDE) 4168 . n - This value should be the same as the local size used in creating the 4169 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4170 calculated if N is given) For square matrices n is almost always m. 4171 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4172 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4173 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4174 . J - column indices 4175 - v - matrix values 4176 4177 Level: intermediate 4178 4179 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4180 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4181 @*/ 4182 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4183 { 4184 PetscErrorCode ierr; 4185 PetscInt cstart,nnz,i,j; 4186 PetscInt *ld; 4187 PetscBool nooffprocentries; 4188 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4189 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4190 PetscScalar *ad = Ad->a, *ao = Ao->a; 4191 const PetscInt *Adi = Ad->i; 4192 PetscInt ldi,Iii,md; 4193 4194 PetscFunctionBegin; 4195 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4196 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4197 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4198 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4199 4200 cstart = mat->cmap->rstart; 4201 if (!Aij->ld) { 4202 /* count number of entries below block diagonal */ 4203 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4204 Aij->ld = ld; 4205 for (i=0; i<m; i++) { 4206 nnz = Ii[i+1]- Ii[i]; 4207 j = 0; 4208 while (J[j] < cstart && j < nnz) {j++;} 4209 J += nnz; 4210 ld[i] = j; 4211 } 4212 } else { 4213 ld = Aij->ld; 4214 } 4215 4216 for (i=0; i<m; i++) { 4217 nnz = Ii[i+1]- Ii[i]; 4218 Iii = Ii[i]; 4219 ldi = ld[i]; 4220 md = Adi[i+1]-Adi[i]; 4221 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4222 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4223 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4224 ad += md; 4225 ao += nnz - md; 4226 } 4227 nooffprocentries = mat->nooffprocentries; 4228 mat->nooffprocentries = PETSC_TRUE; 4229 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4230 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4231 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4232 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4233 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4234 mat->nooffprocentries = nooffprocentries; 4235 PetscFunctionReturn(0); 4236 } 4237 4238 /*@C 4239 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4240 (the default parallel PETSc format). For good matrix assembly performance 4241 the user should preallocate the matrix storage by setting the parameters 4242 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4243 performance can be increased by more than a factor of 50. 4244 4245 Collective 4246 4247 Input Parameters: 4248 + comm - MPI communicator 4249 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4250 This value should be the same as the local size used in creating the 4251 y vector for the matrix-vector product y = Ax. 4252 . n - This value should be the same as the local size used in creating the 4253 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4254 calculated if N is given) For square matrices n is almost always m. 4255 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4256 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4257 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4258 (same value is used for all local rows) 4259 . d_nnz - array containing the number of nonzeros in the various rows of the 4260 DIAGONAL portion of the local submatrix (possibly different for each row) 4261 or NULL, if d_nz is used to specify the nonzero structure. 4262 The size of this array is equal to the number of local rows, i.e 'm'. 4263 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4264 submatrix (same value is used for all local rows). 4265 - o_nnz - array containing the number of nonzeros in the various rows of the 4266 OFF-DIAGONAL portion of the local submatrix (possibly different for 4267 each row) or NULL, if o_nz is used to specify the nonzero 4268 structure. The size of this array is equal to the number 4269 of local rows, i.e 'm'. 4270 4271 Output Parameter: 4272 . A - the matrix 4273 4274 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4275 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4276 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4277 4278 Notes: 4279 If the *_nnz parameter is given then the *_nz parameter is ignored 4280 4281 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4282 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4283 storage requirements for this matrix. 4284 4285 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4286 processor than it must be used on all processors that share the object for 4287 that argument. 4288 4289 The user MUST specify either the local or global matrix dimensions 4290 (possibly both). 4291 4292 The parallel matrix is partitioned across processors such that the 4293 first m0 rows belong to process 0, the next m1 rows belong to 4294 process 1, the next m2 rows belong to process 2 etc.. where 4295 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4296 values corresponding to [m x N] submatrix. 4297 4298 The columns are logically partitioned with the n0 columns belonging 4299 to 0th partition, the next n1 columns belonging to the next 4300 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4301 4302 The DIAGONAL portion of the local submatrix on any given processor 4303 is the submatrix corresponding to the rows and columns m,n 4304 corresponding to the given processor. i.e diagonal matrix on 4305 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4306 etc. The remaining portion of the local submatrix [m x (N-n)] 4307 constitute the OFF-DIAGONAL portion. The example below better 4308 illustrates this concept. 4309 4310 For a square global matrix we define each processor's diagonal portion 4311 to be its local rows and the corresponding columns (a square submatrix); 4312 each processor's off-diagonal portion encompasses the remainder of the 4313 local matrix (a rectangular submatrix). 4314 4315 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4316 4317 When calling this routine with a single process communicator, a matrix of 4318 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4319 type of communicator, use the construction mechanism 4320 .vb 4321 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4322 .ve 4323 4324 $ MatCreate(...,&A); 4325 $ MatSetType(A,MATMPIAIJ); 4326 $ MatSetSizes(A, m,n,M,N); 4327 $ MatMPIAIJSetPreallocation(A,...); 4328 4329 By default, this format uses inodes (identical nodes) when possible. 4330 We search for consecutive rows with the same nonzero structure, thereby 4331 reusing matrix information to achieve increased efficiency. 4332 4333 Options Database Keys: 4334 + -mat_no_inode - Do not use inodes 4335 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4336 4337 4338 4339 Example usage: 4340 4341 Consider the following 8x8 matrix with 34 non-zero values, that is 4342 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4343 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4344 as follows 4345 4346 .vb 4347 1 2 0 | 0 3 0 | 0 4 4348 Proc0 0 5 6 | 7 0 0 | 8 0 4349 9 0 10 | 11 0 0 | 12 0 4350 ------------------------------------- 4351 13 0 14 | 15 16 17 | 0 0 4352 Proc1 0 18 0 | 19 20 21 | 0 0 4353 0 0 0 | 22 23 0 | 24 0 4354 ------------------------------------- 4355 Proc2 25 26 27 | 0 0 28 | 29 0 4356 30 0 0 | 31 32 33 | 0 34 4357 .ve 4358 4359 This can be represented as a collection of submatrices as 4360 4361 .vb 4362 A B C 4363 D E F 4364 G H I 4365 .ve 4366 4367 Where the submatrices A,B,C are owned by proc0, D,E,F are 4368 owned by proc1, G,H,I are owned by proc2. 4369 4370 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4371 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4372 The 'M','N' parameters are 8,8, and have the same values on all procs. 4373 4374 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4375 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4376 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4377 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4378 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4379 matrix, ans [DF] as another SeqAIJ matrix. 4380 4381 When d_nz, o_nz parameters are specified, d_nz storage elements are 4382 allocated for every row of the local diagonal submatrix, and o_nz 4383 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4384 One way to choose d_nz and o_nz is to use the max nonzerors per local 4385 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4386 In this case, the values of d_nz,o_nz are 4387 .vb 4388 proc0 : dnz = 2, o_nz = 2 4389 proc1 : dnz = 3, o_nz = 2 4390 proc2 : dnz = 1, o_nz = 4 4391 .ve 4392 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4393 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4394 for proc3. i.e we are using 12+15+10=37 storage locations to store 4395 34 values. 4396 4397 When d_nnz, o_nnz parameters are specified, the storage is specified 4398 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4399 In the above case the values for d_nnz,o_nnz are 4400 .vb 4401 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4402 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4403 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4404 .ve 4405 Here the space allocated is sum of all the above values i.e 34, and 4406 hence pre-allocation is perfect. 4407 4408 Level: intermediate 4409 4410 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4411 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4412 @*/ 4413 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4414 { 4415 PetscErrorCode ierr; 4416 PetscMPIInt size; 4417 4418 PetscFunctionBegin; 4419 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4420 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4421 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4422 if (size > 1) { 4423 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4424 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4425 } else { 4426 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4427 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4428 } 4429 PetscFunctionReturn(0); 4430 } 4431 4432 /*@C 4433 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4434 4435 Not collective 4436 4437 Input Parameter: 4438 . A - The MPIAIJ matrix 4439 4440 Output Parameters: 4441 + Ad - The local diagonal block as a SeqAIJ matrix 4442 . Ao - The local off-diagonal block as a SeqAIJ matrix 4443 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4444 4445 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4446 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4447 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4448 local column numbers to global column numbers in the original matrix. 4449 4450 Level: intermediate 4451 4452 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4453 @*/ 4454 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4455 { 4456 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4457 PetscBool flg; 4458 PetscErrorCode ierr; 4459 4460 PetscFunctionBegin; 4461 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4462 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4463 if (Ad) *Ad = a->A; 4464 if (Ao) *Ao = a->B; 4465 if (colmap) *colmap = a->garray; 4466 PetscFunctionReturn(0); 4467 } 4468 4469 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4470 { 4471 PetscErrorCode ierr; 4472 PetscInt m,N,i,rstart,nnz,Ii; 4473 PetscInt *indx; 4474 PetscScalar *values; 4475 4476 PetscFunctionBegin; 4477 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4478 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4479 PetscInt *dnz,*onz,sum,bs,cbs; 4480 4481 if (n == PETSC_DECIDE) { 4482 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4483 } 4484 /* Check sum(n) = N */ 4485 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4486 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4487 4488 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4489 rstart -= m; 4490 4491 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4492 for (i=0; i<m; i++) { 4493 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4494 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4495 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4496 } 4497 4498 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4499 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4500 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4501 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4502 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4503 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4504 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4505 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4506 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4507 } 4508 4509 /* numeric phase */ 4510 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4511 for (i=0; i<m; i++) { 4512 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4513 Ii = i + rstart; 4514 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4515 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4516 } 4517 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4519 PetscFunctionReturn(0); 4520 } 4521 4522 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4523 { 4524 PetscErrorCode ierr; 4525 PetscMPIInt rank; 4526 PetscInt m,N,i,rstart,nnz; 4527 size_t len; 4528 const PetscInt *indx; 4529 PetscViewer out; 4530 char *name; 4531 Mat B; 4532 const PetscScalar *values; 4533 4534 PetscFunctionBegin; 4535 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4536 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4537 /* Should this be the type of the diagonal block of A? */ 4538 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4539 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4540 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4541 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4542 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4543 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4544 for (i=0; i<m; i++) { 4545 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4546 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4547 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4548 } 4549 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4550 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4551 4552 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4553 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4554 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4555 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4556 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4557 ierr = PetscFree(name);CHKERRQ(ierr); 4558 ierr = MatView(B,out);CHKERRQ(ierr); 4559 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4560 ierr = MatDestroy(&B);CHKERRQ(ierr); 4561 PetscFunctionReturn(0); 4562 } 4563 4564 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4565 { 4566 PetscErrorCode ierr; 4567 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4568 4569 PetscFunctionBegin; 4570 if (!merge) PetscFunctionReturn(0); 4571 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4580 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4581 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4582 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4583 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4584 ierr = PetscFree(merge);CHKERRQ(ierr); 4585 PetscFunctionReturn(0); 4586 } 4587 4588 #include <../src/mat/utils/freespace.h> 4589 #include <petscbt.h> 4590 4591 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4592 { 4593 PetscErrorCode ierr; 4594 MPI_Comm comm; 4595 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4596 PetscMPIInt size,rank,taga,*len_s; 4597 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4598 PetscInt proc,m; 4599 PetscInt **buf_ri,**buf_rj; 4600 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4601 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4602 MPI_Request *s_waits,*r_waits; 4603 MPI_Status *status; 4604 MatScalar *aa=a->a; 4605 MatScalar **abuf_r,*ba_i; 4606 Mat_Merge_SeqsToMPI *merge; 4607 PetscContainer container; 4608 4609 PetscFunctionBegin; 4610 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4611 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4612 4613 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4614 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4615 4616 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4617 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4618 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4619 4620 bi = merge->bi; 4621 bj = merge->bj; 4622 buf_ri = merge->buf_ri; 4623 buf_rj = merge->buf_rj; 4624 4625 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4626 owners = merge->rowmap->range; 4627 len_s = merge->len_s; 4628 4629 /* send and recv matrix values */ 4630 /*-----------------------------*/ 4631 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4632 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4633 4634 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4635 for (proc=0,k=0; proc<size; proc++) { 4636 if (!len_s[proc]) continue; 4637 i = owners[proc]; 4638 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4639 k++; 4640 } 4641 4642 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4643 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4644 ierr = PetscFree(status);CHKERRQ(ierr); 4645 4646 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4647 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4648 4649 /* insert mat values of mpimat */ 4650 /*----------------------------*/ 4651 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4652 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4653 4654 for (k=0; k<merge->nrecv; k++) { 4655 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4656 nrows = *(buf_ri_k[k]); 4657 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4658 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4659 } 4660 4661 /* set values of ba */ 4662 m = merge->rowmap->n; 4663 for (i=0; i<m; i++) { 4664 arow = owners[rank] + i; 4665 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4666 bnzi = bi[i+1] - bi[i]; 4667 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4668 4669 /* add local non-zero vals of this proc's seqmat into ba */ 4670 anzi = ai[arow+1] - ai[arow]; 4671 aj = a->j + ai[arow]; 4672 aa = a->a + ai[arow]; 4673 nextaj = 0; 4674 for (j=0; nextaj<anzi; j++) { 4675 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4676 ba_i[j] += aa[nextaj++]; 4677 } 4678 } 4679 4680 /* add received vals into ba */ 4681 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4682 /* i-th row */ 4683 if (i == *nextrow[k]) { 4684 anzi = *(nextai[k]+1) - *nextai[k]; 4685 aj = buf_rj[k] + *(nextai[k]); 4686 aa = abuf_r[k] + *(nextai[k]); 4687 nextaj = 0; 4688 for (j=0; nextaj<anzi; j++) { 4689 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4690 ba_i[j] += aa[nextaj++]; 4691 } 4692 } 4693 nextrow[k]++; nextai[k]++; 4694 } 4695 } 4696 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4697 } 4698 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4699 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4700 4701 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4702 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4703 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4704 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4705 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4706 PetscFunctionReturn(0); 4707 } 4708 4709 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4710 { 4711 PetscErrorCode ierr; 4712 Mat B_mpi; 4713 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4714 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4715 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4716 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4717 PetscInt len,proc,*dnz,*onz,bs,cbs; 4718 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4719 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4720 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4721 MPI_Status *status; 4722 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4723 PetscBT lnkbt; 4724 Mat_Merge_SeqsToMPI *merge; 4725 PetscContainer container; 4726 4727 PetscFunctionBegin; 4728 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4729 4730 /* make sure it is a PETSc comm */ 4731 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4732 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4733 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4734 4735 ierr = PetscNew(&merge);CHKERRQ(ierr); 4736 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4737 4738 /* determine row ownership */ 4739 /*---------------------------------------------------------*/ 4740 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4741 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4742 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4743 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4744 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4745 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4746 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4747 4748 m = merge->rowmap->n; 4749 owners = merge->rowmap->range; 4750 4751 /* determine the number of messages to send, their lengths */ 4752 /*---------------------------------------------------------*/ 4753 len_s = merge->len_s; 4754 4755 len = 0; /* length of buf_si[] */ 4756 merge->nsend = 0; 4757 for (proc=0; proc<size; proc++) { 4758 len_si[proc] = 0; 4759 if (proc == rank) { 4760 len_s[proc] = 0; 4761 } else { 4762 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4763 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4764 } 4765 if (len_s[proc]) { 4766 merge->nsend++; 4767 nrows = 0; 4768 for (i=owners[proc]; i<owners[proc+1]; i++) { 4769 if (ai[i+1] > ai[i]) nrows++; 4770 } 4771 len_si[proc] = 2*(nrows+1); 4772 len += len_si[proc]; 4773 } 4774 } 4775 4776 /* determine the number and length of messages to receive for ij-structure */ 4777 /*-------------------------------------------------------------------------*/ 4778 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4779 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4780 4781 /* post the Irecv of j-structure */ 4782 /*-------------------------------*/ 4783 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4784 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4785 4786 /* post the Isend of j-structure */ 4787 /*--------------------------------*/ 4788 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4789 4790 for (proc=0, k=0; proc<size; proc++) { 4791 if (!len_s[proc]) continue; 4792 i = owners[proc]; 4793 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4794 k++; 4795 } 4796 4797 /* receives and sends of j-structure are complete */ 4798 /*------------------------------------------------*/ 4799 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4800 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4801 4802 /* send and recv i-structure */ 4803 /*---------------------------*/ 4804 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4805 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4806 4807 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4808 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4809 for (proc=0,k=0; proc<size; proc++) { 4810 if (!len_s[proc]) continue; 4811 /* form outgoing message for i-structure: 4812 buf_si[0]: nrows to be sent 4813 [1:nrows]: row index (global) 4814 [nrows+1:2*nrows+1]: i-structure index 4815 */ 4816 /*-------------------------------------------*/ 4817 nrows = len_si[proc]/2 - 1; 4818 buf_si_i = buf_si + nrows+1; 4819 buf_si[0] = nrows; 4820 buf_si_i[0] = 0; 4821 nrows = 0; 4822 for (i=owners[proc]; i<owners[proc+1]; i++) { 4823 anzi = ai[i+1] - ai[i]; 4824 if (anzi) { 4825 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4826 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4827 nrows++; 4828 } 4829 } 4830 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4831 k++; 4832 buf_si += len_si[proc]; 4833 } 4834 4835 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4836 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4837 4838 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4839 for (i=0; i<merge->nrecv; i++) { 4840 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4841 } 4842 4843 ierr = PetscFree(len_si);CHKERRQ(ierr); 4844 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4845 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4846 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4847 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4848 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4849 ierr = PetscFree(status);CHKERRQ(ierr); 4850 4851 /* compute a local seq matrix in each processor */ 4852 /*----------------------------------------------*/ 4853 /* allocate bi array and free space for accumulating nonzero column info */ 4854 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4855 bi[0] = 0; 4856 4857 /* create and initialize a linked list */ 4858 nlnk = N+1; 4859 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4860 4861 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4862 len = ai[owners[rank+1]] - ai[owners[rank]]; 4863 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4864 4865 current_space = free_space; 4866 4867 /* determine symbolic info for each local row */ 4868 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4869 4870 for (k=0; k<merge->nrecv; k++) { 4871 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4872 nrows = *buf_ri_k[k]; 4873 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4874 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4875 } 4876 4877 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4878 len = 0; 4879 for (i=0; i<m; i++) { 4880 bnzi = 0; 4881 /* add local non-zero cols of this proc's seqmat into lnk */ 4882 arow = owners[rank] + i; 4883 anzi = ai[arow+1] - ai[arow]; 4884 aj = a->j + ai[arow]; 4885 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4886 bnzi += nlnk; 4887 /* add received col data into lnk */ 4888 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4889 if (i == *nextrow[k]) { /* i-th row */ 4890 anzi = *(nextai[k]+1) - *nextai[k]; 4891 aj = buf_rj[k] + *nextai[k]; 4892 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4893 bnzi += nlnk; 4894 nextrow[k]++; nextai[k]++; 4895 } 4896 } 4897 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4898 4899 /* if free space is not available, make more free space */ 4900 if (current_space->local_remaining<bnzi) { 4901 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4902 nspacedouble++; 4903 } 4904 /* copy data into free space, then initialize lnk */ 4905 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4906 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4907 4908 current_space->array += bnzi; 4909 current_space->local_used += bnzi; 4910 current_space->local_remaining -= bnzi; 4911 4912 bi[i+1] = bi[i] + bnzi; 4913 } 4914 4915 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4916 4917 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4918 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4919 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4920 4921 /* create symbolic parallel matrix B_mpi */ 4922 /*---------------------------------------*/ 4923 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4924 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4925 if (n==PETSC_DECIDE) { 4926 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4927 } else { 4928 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4929 } 4930 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4931 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4932 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4933 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4934 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4935 4936 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4937 B_mpi->assembled = PETSC_FALSE; 4938 merge->bi = bi; 4939 merge->bj = bj; 4940 merge->buf_ri = buf_ri; 4941 merge->buf_rj = buf_rj; 4942 merge->coi = NULL; 4943 merge->coj = NULL; 4944 merge->owners_co = NULL; 4945 4946 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4947 4948 /* attach the supporting struct to B_mpi for reuse */ 4949 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4950 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4951 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4952 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4953 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4954 *mpimat = B_mpi; 4955 4956 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4957 PetscFunctionReturn(0); 4958 } 4959 4960 /*@C 4961 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4962 matrices from each processor 4963 4964 Collective 4965 4966 Input Parameters: 4967 + comm - the communicators the parallel matrix will live on 4968 . seqmat - the input sequential matrices 4969 . m - number of local rows (or PETSC_DECIDE) 4970 . n - number of local columns (or PETSC_DECIDE) 4971 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4972 4973 Output Parameter: 4974 . mpimat - the parallel matrix generated 4975 4976 Level: advanced 4977 4978 Notes: 4979 The dimensions of the sequential matrix in each processor MUST be the same. 4980 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4981 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4982 @*/ 4983 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4984 { 4985 PetscErrorCode ierr; 4986 PetscMPIInt size; 4987 4988 PetscFunctionBegin; 4989 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4990 if (size == 1) { 4991 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4992 if (scall == MAT_INITIAL_MATRIX) { 4993 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4994 } else { 4995 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4996 } 4997 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4998 PetscFunctionReturn(0); 4999 } 5000 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5001 if (scall == MAT_INITIAL_MATRIX) { 5002 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5003 } 5004 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5005 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5006 PetscFunctionReturn(0); 5007 } 5008 5009 /*@ 5010 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5011 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5012 with MatGetSize() 5013 5014 Not Collective 5015 5016 Input Parameters: 5017 + A - the matrix 5018 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5019 5020 Output Parameter: 5021 . A_loc - the local sequential matrix generated 5022 5023 Level: developer 5024 5025 Notes: 5026 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5027 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5028 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5029 modify the values of the returned A_loc. 5030 5031 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5032 @*/ 5033 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5034 { 5035 PetscErrorCode ierr; 5036 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5037 Mat_SeqAIJ *mat,*a,*b; 5038 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5039 const PetscScalar *aa,*ba,*aav,*bav; 5040 PetscScalar *ca,*cam; 5041 PetscMPIInt size; 5042 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5043 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5044 PetscBool match; 5045 5046 PetscFunctionBegin; 5047 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5048 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5049 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5050 if (size == 1) { 5051 if (scall == MAT_INITIAL_MATRIX) { 5052 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5053 *A_loc = mpimat->A; 5054 } else if (scall == MAT_REUSE_MATRIX) { 5055 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5056 } 5057 PetscFunctionReturn(0); 5058 } 5059 5060 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5061 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5062 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5063 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5064 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5065 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5066 aa = aav; 5067 ba = bav; 5068 if (scall == MAT_INITIAL_MATRIX) { 5069 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5070 ci[0] = 0; 5071 for (i=0; i<am; i++) { 5072 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5073 } 5074 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5075 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5076 k = 0; 5077 for (i=0; i<am; i++) { 5078 ncols_o = bi[i+1] - bi[i]; 5079 ncols_d = ai[i+1] - ai[i]; 5080 /* off-diagonal portion of A */ 5081 for (jo=0; jo<ncols_o; jo++) { 5082 col = cmap[*bj]; 5083 if (col >= cstart) break; 5084 cj[k] = col; bj++; 5085 ca[k++] = *ba++; 5086 } 5087 /* diagonal portion of A */ 5088 for (j=0; j<ncols_d; j++) { 5089 cj[k] = cstart + *aj++; 5090 ca[k++] = *aa++; 5091 } 5092 /* off-diagonal portion of A */ 5093 for (j=jo; j<ncols_o; j++) { 5094 cj[k] = cmap[*bj++]; 5095 ca[k++] = *ba++; 5096 } 5097 } 5098 /* put together the new matrix */ 5099 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5100 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5101 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5102 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5103 mat->free_a = PETSC_TRUE; 5104 mat->free_ij = PETSC_TRUE; 5105 mat->nonew = 0; 5106 } else if (scall == MAT_REUSE_MATRIX) { 5107 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5108 #if defined(PETSC_USE_DEVICE) 5109 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5110 #endif 5111 ci = mat->i; cj = mat->j; cam = mat->a; 5112 for (i=0; i<am; i++) { 5113 /* off-diagonal portion of A */ 5114 ncols_o = bi[i+1] - bi[i]; 5115 for (jo=0; jo<ncols_o; jo++) { 5116 col = cmap[*bj]; 5117 if (col >= cstart) break; 5118 *cam++ = *ba++; bj++; 5119 } 5120 /* diagonal portion of A */ 5121 ncols_d = ai[i+1] - ai[i]; 5122 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5123 /* off-diagonal portion of A */ 5124 for (j=jo; j<ncols_o; j++) { 5125 *cam++ = *ba++; bj++; 5126 } 5127 } 5128 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5129 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5130 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5131 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5132 PetscFunctionReturn(0); 5133 } 5134 5135 /*@ 5136 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5137 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5138 5139 Not Collective 5140 5141 Input Parameters: 5142 + A - the matrix 5143 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5144 5145 Output Parameter: 5146 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5147 - A_loc - the local sequential matrix generated 5148 5149 Level: developer 5150 5151 Notes: 5152 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5153 5154 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5155 5156 @*/ 5157 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5158 { 5159 PetscErrorCode ierr; 5160 Mat Ao,Ad; 5161 const PetscInt *cmap; 5162 PetscMPIInt size; 5163 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5164 5165 PetscFunctionBegin; 5166 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5167 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5168 if (size == 1) { 5169 if (scall == MAT_INITIAL_MATRIX) { 5170 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5171 *A_loc = Ad; 5172 } else if (scall == MAT_REUSE_MATRIX) { 5173 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5174 } 5175 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5176 PetscFunctionReturn(0); 5177 } 5178 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5179 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5180 if (f) { 5181 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5182 } else { 5183 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5184 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5185 Mat_SeqAIJ *c; 5186 PetscInt *ai = a->i, *aj = a->j; 5187 PetscInt *bi = b->i, *bj = b->j; 5188 PetscInt *ci,*cj; 5189 const PetscScalar *aa,*ba; 5190 PetscScalar *ca; 5191 PetscInt i,j,am,dn,on; 5192 5193 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5194 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5195 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5196 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5197 if (scall == MAT_INITIAL_MATRIX) { 5198 PetscInt k; 5199 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5200 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5201 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5202 ci[0] = 0; 5203 for (i=0,k=0; i<am; i++) { 5204 const PetscInt ncols_o = bi[i+1] - bi[i]; 5205 const PetscInt ncols_d = ai[i+1] - ai[i]; 5206 ci[i+1] = ci[i] + ncols_o + ncols_d; 5207 /* diagonal portion of A */ 5208 for (j=0; j<ncols_d; j++,k++) { 5209 cj[k] = *aj++; 5210 ca[k] = *aa++; 5211 } 5212 /* off-diagonal portion of A */ 5213 for (j=0; j<ncols_o; j++,k++) { 5214 cj[k] = dn + *bj++; 5215 ca[k] = *ba++; 5216 } 5217 } 5218 /* put together the new matrix */ 5219 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5220 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5221 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5222 c = (Mat_SeqAIJ*)(*A_loc)->data; 5223 c->free_a = PETSC_TRUE; 5224 c->free_ij = PETSC_TRUE; 5225 c->nonew = 0; 5226 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5227 } else if (scall == MAT_REUSE_MATRIX) { 5228 #if defined(PETSC_HAVE_DEVICE) 5229 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5230 #endif 5231 c = (Mat_SeqAIJ*)(*A_loc)->data; 5232 ca = c->a; 5233 for (i=0; i<am; i++) { 5234 const PetscInt ncols_d = ai[i+1] - ai[i]; 5235 const PetscInt ncols_o = bi[i+1] - bi[i]; 5236 /* diagonal portion of A */ 5237 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5238 /* off-diagonal portion of A */ 5239 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5240 } 5241 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5242 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5243 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5244 if (glob) { 5245 PetscInt cst, *gidx; 5246 5247 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5248 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5249 for (i=0; i<dn; i++) gidx[i] = cst + i; 5250 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5251 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5252 } 5253 } 5254 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5255 PetscFunctionReturn(0); 5256 } 5257 5258 /*@C 5259 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5260 5261 Not Collective 5262 5263 Input Parameters: 5264 + A - the matrix 5265 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5266 - row, col - index sets of rows and columns to extract (or NULL) 5267 5268 Output Parameter: 5269 . A_loc - the local sequential matrix generated 5270 5271 Level: developer 5272 5273 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5274 5275 @*/ 5276 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5277 { 5278 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5279 PetscErrorCode ierr; 5280 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5281 IS isrowa,iscola; 5282 Mat *aloc; 5283 PetscBool match; 5284 5285 PetscFunctionBegin; 5286 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5287 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5288 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5289 if (!row) { 5290 start = A->rmap->rstart; end = A->rmap->rend; 5291 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5292 } else { 5293 isrowa = *row; 5294 } 5295 if (!col) { 5296 start = A->cmap->rstart; 5297 cmap = a->garray; 5298 nzA = a->A->cmap->n; 5299 nzB = a->B->cmap->n; 5300 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5301 ncols = 0; 5302 for (i=0; i<nzB; i++) { 5303 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5304 else break; 5305 } 5306 imark = i; 5307 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5308 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5309 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5310 } else { 5311 iscola = *col; 5312 } 5313 if (scall != MAT_INITIAL_MATRIX) { 5314 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5315 aloc[0] = *A_loc; 5316 } 5317 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5318 if (!col) { /* attach global id of condensed columns */ 5319 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5320 } 5321 *A_loc = aloc[0]; 5322 ierr = PetscFree(aloc);CHKERRQ(ierr); 5323 if (!row) { 5324 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5325 } 5326 if (!col) { 5327 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5328 } 5329 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5330 PetscFunctionReturn(0); 5331 } 5332 5333 /* 5334 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5335 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5336 * on a global size. 5337 * */ 5338 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5339 { 5340 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5341 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5342 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5343 PetscMPIInt owner; 5344 PetscSFNode *iremote,*oiremote; 5345 const PetscInt *lrowindices; 5346 PetscErrorCode ierr; 5347 PetscSF sf,osf; 5348 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5349 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5350 MPI_Comm comm; 5351 ISLocalToGlobalMapping mapping; 5352 5353 PetscFunctionBegin; 5354 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5355 /* plocalsize is the number of roots 5356 * nrows is the number of leaves 5357 * */ 5358 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5359 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5360 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5361 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5362 for (i=0;i<nrows;i++) { 5363 /* Find a remote index and an owner for a row 5364 * The row could be local or remote 5365 * */ 5366 owner = 0; 5367 lidx = 0; 5368 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5369 iremote[i].index = lidx; 5370 iremote[i].rank = owner; 5371 } 5372 /* Create SF to communicate how many nonzero columns for each row */ 5373 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5374 /* SF will figure out the number of nonzero colunms for each row, and their 5375 * offsets 5376 * */ 5377 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5378 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5379 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5380 5381 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5382 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5383 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5384 roffsets[0] = 0; 5385 roffsets[1] = 0; 5386 for (i=0;i<plocalsize;i++) { 5387 /* diag */ 5388 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5389 /* off diag */ 5390 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5391 /* compute offsets so that we relative location for each row */ 5392 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5393 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5394 } 5395 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5396 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5397 /* 'r' means root, and 'l' means leaf */ 5398 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5399 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5400 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5401 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5402 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5403 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5404 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5405 dntotalcols = 0; 5406 ontotalcols = 0; 5407 ncol = 0; 5408 for (i=0;i<nrows;i++) { 5409 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5410 ncol = PetscMax(pnnz[i],ncol); 5411 /* diag */ 5412 dntotalcols += nlcols[i*2+0]; 5413 /* off diag */ 5414 ontotalcols += nlcols[i*2+1]; 5415 } 5416 /* We do not need to figure the right number of columns 5417 * since all the calculations will be done by going through the raw data 5418 * */ 5419 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5420 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5421 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5422 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5423 /* diag */ 5424 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5425 /* off diag */ 5426 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5427 /* diag */ 5428 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5429 /* off diag */ 5430 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5431 dntotalcols = 0; 5432 ontotalcols = 0; 5433 ntotalcols = 0; 5434 for (i=0;i<nrows;i++) { 5435 owner = 0; 5436 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5437 /* Set iremote for diag matrix */ 5438 for (j=0;j<nlcols[i*2+0];j++) { 5439 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5440 iremote[dntotalcols].rank = owner; 5441 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5442 ilocal[dntotalcols++] = ntotalcols++; 5443 } 5444 /* off diag */ 5445 for (j=0;j<nlcols[i*2+1];j++) { 5446 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5447 oiremote[ontotalcols].rank = owner; 5448 oilocal[ontotalcols++] = ntotalcols++; 5449 } 5450 } 5451 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5452 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5453 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5454 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5455 /* P serves as roots and P_oth is leaves 5456 * Diag matrix 5457 * */ 5458 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5459 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5460 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5461 5462 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5463 /* Off diag */ 5464 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5465 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5466 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5467 /* We operate on the matrix internal data for saving memory */ 5468 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5469 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5470 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5471 /* Convert to global indices for diag matrix */ 5472 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5473 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5474 /* We want P_oth store global indices */ 5475 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5476 /* Use memory scalable approach */ 5477 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5478 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5479 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5480 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5481 /* Convert back to local indices */ 5482 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5483 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5484 nout = 0; 5485 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5486 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5487 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5488 /* Exchange values */ 5489 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5490 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5491 /* Stop PETSc from shrinking memory */ 5492 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5493 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5494 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5495 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5496 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5497 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5498 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5499 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5500 PetscFunctionReturn(0); 5501 } 5502 5503 /* 5504 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5505 * This supports MPIAIJ and MAIJ 5506 * */ 5507 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5508 { 5509 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5510 Mat_SeqAIJ *p_oth; 5511 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5512 IS rows,map; 5513 PetscHMapI hamp; 5514 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5515 MPI_Comm comm; 5516 PetscSF sf,osf; 5517 PetscBool has; 5518 PetscErrorCode ierr; 5519 5520 PetscFunctionBegin; 5521 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5522 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5523 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5524 * and then create a submatrix (that often is an overlapping matrix) 5525 * */ 5526 if (reuse == MAT_INITIAL_MATRIX) { 5527 /* Use a hash table to figure out unique keys */ 5528 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5529 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5530 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5531 count = 0; 5532 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5533 for (i=0;i<a->B->cmap->n;i++) { 5534 key = a->garray[i]/dof; 5535 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5536 if (!has) { 5537 mapping[i] = count; 5538 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5539 } else { 5540 /* Current 'i' has the same value the previous step */ 5541 mapping[i] = count-1; 5542 } 5543 } 5544 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5545 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5546 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5547 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5548 off = 0; 5549 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5550 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5551 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5552 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5553 /* In case, the matrix was already created but users want to recreate the matrix */ 5554 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5555 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5556 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5557 ierr = ISDestroy(&map);CHKERRQ(ierr); 5558 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5559 } else if (reuse == MAT_REUSE_MATRIX) { 5560 /* If matrix was already created, we simply update values using SF objects 5561 * that as attached to the matrix ealier. 5562 * */ 5563 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5564 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5565 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5566 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5567 /* Update values in place */ 5568 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5569 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5570 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5571 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5572 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5573 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5574 PetscFunctionReturn(0); 5575 } 5576 5577 /*@C 5578 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5579 5580 Collective on Mat 5581 5582 Input Parameters: 5583 + A,B - the matrices in mpiaij format 5584 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5585 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5586 5587 Output Parameter: 5588 + rowb, colb - index sets of rows and columns of B to extract 5589 - B_seq - the sequential matrix generated 5590 5591 Level: developer 5592 5593 @*/ 5594 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5595 { 5596 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5597 PetscErrorCode ierr; 5598 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5599 IS isrowb,iscolb; 5600 Mat *bseq=NULL; 5601 5602 PetscFunctionBegin; 5603 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5604 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5605 } 5606 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5607 5608 if (scall == MAT_INITIAL_MATRIX) { 5609 start = A->cmap->rstart; 5610 cmap = a->garray; 5611 nzA = a->A->cmap->n; 5612 nzB = a->B->cmap->n; 5613 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5614 ncols = 0; 5615 for (i=0; i<nzB; i++) { /* row < local row index */ 5616 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5617 else break; 5618 } 5619 imark = i; 5620 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5621 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5622 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5623 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5624 } else { 5625 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5626 isrowb = *rowb; iscolb = *colb; 5627 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5628 bseq[0] = *B_seq; 5629 } 5630 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5631 *B_seq = bseq[0]; 5632 ierr = PetscFree(bseq);CHKERRQ(ierr); 5633 if (!rowb) { 5634 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5635 } else { 5636 *rowb = isrowb; 5637 } 5638 if (!colb) { 5639 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5640 } else { 5641 *colb = iscolb; 5642 } 5643 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5644 PetscFunctionReturn(0); 5645 } 5646 5647 /* 5648 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5649 of the OFF-DIAGONAL portion of local A 5650 5651 Collective on Mat 5652 5653 Input Parameters: 5654 + A,B - the matrices in mpiaij format 5655 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5656 5657 Output Parameter: 5658 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5659 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5660 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5661 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5662 5663 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5664 for this matrix. This is not desirable.. 5665 5666 Level: developer 5667 5668 */ 5669 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5670 { 5671 PetscErrorCode ierr; 5672 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5673 Mat_SeqAIJ *b_oth; 5674 VecScatter ctx; 5675 MPI_Comm comm; 5676 const PetscMPIInt *rprocs,*sprocs; 5677 const PetscInt *srow,*rstarts,*sstarts; 5678 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5679 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5680 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5681 MPI_Request *rwaits = NULL,*swaits = NULL; 5682 MPI_Status rstatus; 5683 PetscMPIInt size,tag,rank,nsends_mpi,nrecvs_mpi; 5684 PETSC_UNUSED PetscMPIInt jj; 5685 5686 PetscFunctionBegin; 5687 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5688 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5689 5690 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5691 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5692 } 5693 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5694 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5695 5696 if (size == 1) { 5697 startsj_s = NULL; 5698 bufa_ptr = NULL; 5699 *B_oth = NULL; 5700 PetscFunctionReturn(0); 5701 } 5702 5703 ctx = a->Mvctx; 5704 tag = ((PetscObject)ctx)->tag; 5705 5706 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5707 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5708 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5709 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5710 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5711 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5712 5713 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5714 if (scall == MAT_INITIAL_MATRIX) { 5715 /* i-array */ 5716 /*---------*/ 5717 /* post receives */ 5718 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5719 for (i=0; i<nrecvs; i++) { 5720 rowlen = rvalues + rstarts[i]*rbs; 5721 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5722 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5723 } 5724 5725 /* pack the outgoing message */ 5726 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5727 5728 sstartsj[0] = 0; 5729 rstartsj[0] = 0; 5730 len = 0; /* total length of j or a array to be sent */ 5731 if (nsends) { 5732 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5733 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5734 } 5735 for (i=0; i<nsends; i++) { 5736 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5737 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5738 for (j=0; j<nrows; j++) { 5739 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5740 for (l=0; l<sbs; l++) { 5741 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5742 5743 rowlen[j*sbs+l] = ncols; 5744 5745 len += ncols; 5746 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5747 } 5748 k++; 5749 } 5750 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5751 5752 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5753 } 5754 /* recvs and sends of i-array are completed */ 5755 i = nrecvs; 5756 while (i--) { 5757 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5758 } 5759 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5760 ierr = PetscFree(svalues);CHKERRQ(ierr); 5761 5762 /* allocate buffers for sending j and a arrays */ 5763 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5764 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5765 5766 /* create i-array of B_oth */ 5767 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5768 5769 b_othi[0] = 0; 5770 len = 0; /* total length of j or a array to be received */ 5771 k = 0; 5772 for (i=0; i<nrecvs; i++) { 5773 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5774 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5775 for (j=0; j<nrows; j++) { 5776 b_othi[k+1] = b_othi[k] + rowlen[j]; 5777 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5778 k++; 5779 } 5780 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5781 } 5782 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5783 5784 /* allocate space for j and a arrrays of B_oth */ 5785 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5786 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5787 5788 /* j-array */ 5789 /*---------*/ 5790 /* post receives of j-array */ 5791 for (i=0; i<nrecvs; i++) { 5792 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5793 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5794 } 5795 5796 /* pack the outgoing message j-array */ 5797 if (nsends) k = sstarts[0]; 5798 for (i=0; i<nsends; i++) { 5799 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5800 bufJ = bufj+sstartsj[i]; 5801 for (j=0; j<nrows; j++) { 5802 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5803 for (ll=0; ll<sbs; ll++) { 5804 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5805 for (l=0; l<ncols; l++) { 5806 *bufJ++ = cols[l]; 5807 } 5808 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5809 } 5810 } 5811 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5812 } 5813 5814 /* recvs and sends of j-array are completed */ 5815 i = nrecvs; 5816 while (i--) { 5817 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5818 } 5819 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5820 } else if (scall == MAT_REUSE_MATRIX) { 5821 sstartsj = *startsj_s; 5822 rstartsj = *startsj_r; 5823 bufa = *bufa_ptr; 5824 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5825 b_otha = b_oth->a; 5826 #if defined(PETSC_HAVE_DEVICE) 5827 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5828 #endif 5829 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5830 5831 /* a-array */ 5832 /*---------*/ 5833 /* post receives of a-array */ 5834 for (i=0; i<nrecvs; i++) { 5835 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5836 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5837 } 5838 5839 /* pack the outgoing message a-array */ 5840 if (nsends) k = sstarts[0]; 5841 for (i=0; i<nsends; i++) { 5842 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5843 bufA = bufa+sstartsj[i]; 5844 for (j=0; j<nrows; j++) { 5845 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5846 for (ll=0; ll<sbs; ll++) { 5847 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5848 for (l=0; l<ncols; l++) { 5849 *bufA++ = vals[l]; 5850 } 5851 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5852 } 5853 } 5854 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5855 } 5856 /* recvs and sends of a-array are completed */ 5857 i = nrecvs; 5858 while (i--) { 5859 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5860 } 5861 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5862 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5863 5864 if (scall == MAT_INITIAL_MATRIX) { 5865 /* put together the new matrix */ 5866 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5867 5868 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5869 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5870 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5871 b_oth->free_a = PETSC_TRUE; 5872 b_oth->free_ij = PETSC_TRUE; 5873 b_oth->nonew = 0; 5874 5875 ierr = PetscFree(bufj);CHKERRQ(ierr); 5876 if (!startsj_s || !bufa_ptr) { 5877 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5878 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5879 } else { 5880 *startsj_s = sstartsj; 5881 *startsj_r = rstartsj; 5882 *bufa_ptr = bufa; 5883 } 5884 } 5885 5886 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5887 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5888 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5889 PetscFunctionReturn(0); 5890 } 5891 5892 /*@C 5893 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5894 5895 Not Collective 5896 5897 Input Parameters: 5898 . A - The matrix in mpiaij format 5899 5900 Output Parameter: 5901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5902 . colmap - A map from global column index to local index into lvec 5903 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5904 5905 Level: developer 5906 5907 @*/ 5908 #if defined(PETSC_USE_CTABLE) 5909 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5910 #else 5911 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5912 #endif 5913 { 5914 Mat_MPIAIJ *a; 5915 5916 PetscFunctionBegin; 5917 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5918 PetscValidPointer(lvec, 2); 5919 PetscValidPointer(colmap, 3); 5920 PetscValidPointer(multScatter, 4); 5921 a = (Mat_MPIAIJ*) A->data; 5922 if (lvec) *lvec = a->lvec; 5923 if (colmap) *colmap = a->colmap; 5924 if (multScatter) *multScatter = a->Mvctx; 5925 PetscFunctionReturn(0); 5926 } 5927 5928 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5931 #if defined(PETSC_HAVE_MKL_SPARSE) 5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5933 #endif 5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5936 #if defined(PETSC_HAVE_ELEMENTAL) 5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5938 #endif 5939 #if defined(PETSC_HAVE_SCALAPACK) 5940 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5941 #endif 5942 #if defined(PETSC_HAVE_HYPRE) 5943 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5944 #endif 5945 #if defined(PETSC_HAVE_CUDA) 5946 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5947 #endif 5948 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5949 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5950 #endif 5951 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5952 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5953 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5954 5955 /* 5956 Computes (B'*A')' since computing B*A directly is untenable 5957 5958 n p p 5959 [ ] [ ] [ ] 5960 m [ A ] * n [ B ] = m [ C ] 5961 [ ] [ ] [ ] 5962 5963 */ 5964 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5965 { 5966 PetscErrorCode ierr; 5967 Mat At,Bt,Ct; 5968 5969 PetscFunctionBegin; 5970 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5971 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5972 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5973 ierr = MatDestroy(&At);CHKERRQ(ierr); 5974 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5975 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5976 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5977 PetscFunctionReturn(0); 5978 } 5979 5980 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5981 { 5982 PetscErrorCode ierr; 5983 PetscBool cisdense; 5984 5985 PetscFunctionBegin; 5986 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5987 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5988 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5989 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5990 if (!cisdense) { 5991 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5992 } 5993 ierr = MatSetUp(C);CHKERRQ(ierr); 5994 5995 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5996 PetscFunctionReturn(0); 5997 } 5998 5999 /* ----------------------------------------------------------------*/ 6000 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6001 { 6002 Mat_Product *product = C->product; 6003 Mat A = product->A,B=product->B; 6004 6005 PetscFunctionBegin; 6006 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6007 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6008 6009 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6010 C->ops->productsymbolic = MatProductSymbolic_AB; 6011 PetscFunctionReturn(0); 6012 } 6013 6014 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6015 { 6016 PetscErrorCode ierr; 6017 Mat_Product *product = C->product; 6018 6019 PetscFunctionBegin; 6020 if (product->type == MATPRODUCT_AB) { 6021 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6022 } 6023 PetscFunctionReturn(0); 6024 } 6025 /* ----------------------------------------------------------------*/ 6026 6027 /*MC 6028 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6029 6030 Options Database Keys: 6031 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6032 6033 Level: beginner 6034 6035 Notes: 6036 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6037 in this case the values associated with the rows and columns one passes in are set to zero 6038 in the matrix 6039 6040 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6041 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6042 6043 .seealso: MatCreateAIJ() 6044 M*/ 6045 6046 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6047 { 6048 Mat_MPIAIJ *b; 6049 PetscErrorCode ierr; 6050 PetscMPIInt size; 6051 6052 PetscFunctionBegin; 6053 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6054 6055 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6056 B->data = (void*)b; 6057 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6058 B->assembled = PETSC_FALSE; 6059 B->insertmode = NOT_SET_VALUES; 6060 b->size = size; 6061 6062 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6063 6064 /* build cache for off array entries formed */ 6065 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6066 6067 b->donotstash = PETSC_FALSE; 6068 b->colmap = NULL; 6069 b->garray = NULL; 6070 b->roworiented = PETSC_TRUE; 6071 6072 /* stuff used for matrix vector multiply */ 6073 b->lvec = NULL; 6074 b->Mvctx = NULL; 6075 6076 /* stuff for MatGetRow() */ 6077 b->rowindices = NULL; 6078 b->rowvalues = NULL; 6079 b->getrowactive = PETSC_FALSE; 6080 6081 /* flexible pointer used in CUSPARSE classes */ 6082 b->spptr = NULL; 6083 6084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6094 #if defined(PETSC_HAVE_CUDA) 6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6096 #endif 6097 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6099 #endif 6100 #if defined(PETSC_HAVE_MKL_SPARSE) 6101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6102 #endif 6103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6106 #if defined(PETSC_HAVE_ELEMENTAL) 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6108 #endif 6109 #if defined(PETSC_HAVE_SCALAPACK) 6110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6111 #endif 6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6114 #if defined(PETSC_HAVE_HYPRE) 6115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6117 #endif 6118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6120 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6121 PetscFunctionReturn(0); 6122 } 6123 6124 /*@C 6125 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6126 and "off-diagonal" part of the matrix in CSR format. 6127 6128 Collective 6129 6130 Input Parameters: 6131 + comm - MPI communicator 6132 . m - number of local rows (Cannot be PETSC_DECIDE) 6133 . n - This value should be the same as the local size used in creating the 6134 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6135 calculated if N is given) For square matrices n is almost always m. 6136 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6137 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6138 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6139 . j - column indices 6140 . a - matrix values 6141 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6142 . oj - column indices 6143 - oa - matrix values 6144 6145 Output Parameter: 6146 . mat - the matrix 6147 6148 Level: advanced 6149 6150 Notes: 6151 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6152 must free the arrays once the matrix has been destroyed and not before. 6153 6154 The i and j indices are 0 based 6155 6156 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6157 6158 This sets local rows and cannot be used to set off-processor values. 6159 6160 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6161 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6162 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6163 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6164 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6165 communication if it is known that only local entries will be set. 6166 6167 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6168 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6169 @*/ 6170 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6171 { 6172 PetscErrorCode ierr; 6173 Mat_MPIAIJ *maij; 6174 6175 PetscFunctionBegin; 6176 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6177 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6178 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6179 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6180 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6181 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6182 maij = (Mat_MPIAIJ*) (*mat)->data; 6183 6184 (*mat)->preallocated = PETSC_TRUE; 6185 6186 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6187 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6188 6189 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6190 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6191 6192 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6193 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6194 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6195 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6196 6197 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6198 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6199 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6200 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6201 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6202 PetscFunctionReturn(0); 6203 } 6204 6205 /* 6206 Special version for direct calls from Fortran 6207 */ 6208 #include <petsc/private/fortranimpl.h> 6209 6210 /* Change these macros so can be used in void function */ 6211 #undef CHKERRQ 6212 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6213 #undef SETERRQ2 6214 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6215 #undef SETERRQ3 6216 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6217 #undef SETERRQ 6218 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6219 6220 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6221 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6222 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6223 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6224 #else 6225 #endif 6226 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6227 { 6228 Mat mat = *mmat; 6229 PetscInt m = *mm, n = *mn; 6230 InsertMode addv = *maddv; 6231 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6232 PetscScalar value; 6233 PetscErrorCode ierr; 6234 6235 MatCheckPreallocated(mat,1); 6236 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6237 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6238 { 6239 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6240 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6241 PetscBool roworiented = aij->roworiented; 6242 6243 /* Some Variables required in the macro */ 6244 Mat A = aij->A; 6245 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6246 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6247 MatScalar *aa = a->a; 6248 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6249 Mat B = aij->B; 6250 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6251 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6252 MatScalar *ba = b->a; 6253 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6254 * cannot use "#if defined" inside a macro. */ 6255 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6256 6257 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6258 PetscInt nonew = a->nonew; 6259 MatScalar *ap1,*ap2; 6260 6261 PetscFunctionBegin; 6262 for (i=0; i<m; i++) { 6263 if (im[i] < 0) continue; 6264 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6265 if (im[i] >= rstart && im[i] < rend) { 6266 row = im[i] - rstart; 6267 lastcol1 = -1; 6268 rp1 = aj + ai[row]; 6269 ap1 = aa + ai[row]; 6270 rmax1 = aimax[row]; 6271 nrow1 = ailen[row]; 6272 low1 = 0; 6273 high1 = nrow1; 6274 lastcol2 = -1; 6275 rp2 = bj + bi[row]; 6276 ap2 = ba + bi[row]; 6277 rmax2 = bimax[row]; 6278 nrow2 = bilen[row]; 6279 low2 = 0; 6280 high2 = nrow2; 6281 6282 for (j=0; j<n; j++) { 6283 if (roworiented) value = v[i*n+j]; 6284 else value = v[i+j*m]; 6285 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6286 if (in[j] >= cstart && in[j] < cend) { 6287 col = in[j] - cstart; 6288 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6289 #if defined(PETSC_HAVE_DEVICE) 6290 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6291 #endif 6292 } else if (in[j] < 0) continue; 6293 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6294 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6295 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6296 } else { 6297 if (mat->was_assembled) { 6298 if (!aij->colmap) { 6299 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6300 } 6301 #if defined(PETSC_USE_CTABLE) 6302 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6303 col--; 6304 #else 6305 col = aij->colmap[in[j]] - 1; 6306 #endif 6307 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6308 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6309 col = in[j]; 6310 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6311 B = aij->B; 6312 b = (Mat_SeqAIJ*)B->data; 6313 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6314 rp2 = bj + bi[row]; 6315 ap2 = ba + bi[row]; 6316 rmax2 = bimax[row]; 6317 nrow2 = bilen[row]; 6318 low2 = 0; 6319 high2 = nrow2; 6320 bm = aij->B->rmap->n; 6321 ba = b->a; 6322 inserted = PETSC_FALSE; 6323 } 6324 } else col = in[j]; 6325 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6326 #if defined(PETSC_HAVE_DEVICE) 6327 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6328 #endif 6329 } 6330 } 6331 } else if (!aij->donotstash) { 6332 if (roworiented) { 6333 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6334 } else { 6335 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6336 } 6337 } 6338 } 6339 } 6340 PetscFunctionReturnVoid(); 6341 } 6342 6343 typedef struct { 6344 Mat *mp; /* intermediate products */ 6345 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6346 PetscInt cp; /* number of intermediate products */ 6347 6348 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6349 PetscInt *startsj_s,*startsj_r; 6350 PetscScalar *bufa; 6351 Mat P_oth; 6352 6353 /* may take advantage of merging product->B */ 6354 Mat Bloc; 6355 6356 /* cusparse does not have support to split between symbolic and numeric phases 6357 When api_user is true, we don't need to update the numerical values 6358 of the temporary storage */ 6359 PetscBool reusesym; 6360 6361 /* support for COO values insertion */ 6362 PetscScalar *coo_v,*coo_w; 6363 PetscInt **own; 6364 PetscInt **off; 6365 PetscBool hasoffproc; /* if true, non-local values insertion (i.e. AtB or PtAP) */ 6366 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6367 PetscMemType mtype; 6368 6369 /* customization */ 6370 PetscBool abmerge; 6371 PetscBool P_oth_bind; 6372 } MatMatMPIAIJBACKEND; 6373 6374 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6375 { 6376 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6377 PetscInt i; 6378 PetscErrorCode ierr; 6379 6380 PetscFunctionBegin; 6381 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6382 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6383 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6384 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6385 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6386 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6387 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6388 for (i = 0; i < mmdata->cp; i++) { 6389 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6390 } 6391 ierr = PetscFree(mmdata->mp);CHKERRQ(ierr); 6392 ierr = PetscFree(mmdata->mptmp);CHKERRQ(ierr); 6393 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6394 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6395 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6396 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6397 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6398 PetscFunctionReturn(0); 6399 } 6400 6401 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6402 { 6403 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6404 PetscErrorCode ierr; 6405 6406 PetscFunctionBegin; 6407 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6408 if (f) { 6409 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6410 } else { 6411 const PetscScalar *vv; 6412 6413 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6414 if (n && idx) { 6415 PetscScalar *w = v; 6416 const PetscInt *oi = idx; 6417 PetscInt j; 6418 6419 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6420 } else { 6421 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6422 } 6423 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6424 } 6425 PetscFunctionReturn(0); 6426 } 6427 6428 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6429 { 6430 MatMatMPIAIJBACKEND *mmdata; 6431 PetscInt i,n_d,n_o; 6432 PetscErrorCode ierr; 6433 6434 PetscFunctionBegin; 6435 MatCheckProduct(C,1); 6436 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6437 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6438 if (!mmdata->reusesym) { /* update temporary matrices */ 6439 if (mmdata->P_oth) { 6440 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6441 } 6442 if (mmdata->Bloc) { 6443 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6444 } 6445 } 6446 mmdata->reusesym = PETSC_FALSE; 6447 6448 for (i = 0; i < mmdata->cp; i++) { 6449 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6450 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6451 } 6452 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6453 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6454 6455 if (mmdata->mptmp[i]) continue; 6456 if (noff) { 6457 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6458 6459 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6460 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6461 n_o += noff; 6462 n_d += nown; 6463 } else { 6464 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6465 6466 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6467 n_d += mm->nz; 6468 } 6469 } 6470 if (mmdata->hasoffproc) { /* offprocess insertion */ 6471 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6472 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6473 } 6474 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6475 PetscFunctionReturn(0); 6476 } 6477 6478 /* Support for Pt * A, A * P, or Pt * A * P */ 6479 #define MAX_NUMBER_INTERMEDIATE 4 6480 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6481 { 6482 Mat_Product *product = C->product; 6483 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; 6484 Mat_MPIAIJ *a,*p; 6485 MatMatMPIAIJBACKEND *mmdata; 6486 ISLocalToGlobalMapping P_oth_l2g = NULL; 6487 IS glob = NULL; 6488 const char *prefix; 6489 char pprefix[256]; 6490 const PetscInt *globidx,*P_oth_idx; 6491 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; 6492 PetscInt cp = 0,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j,cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE],i,j; 6493 MatProductType ptype; 6494 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6495 PetscMPIInt size; 6496 PetscErrorCode ierr; 6497 6498 PetscFunctionBegin; 6499 MatCheckProduct(C,1); 6500 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6501 ptype = product->type; 6502 if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB; 6503 switch (ptype) { 6504 case MATPRODUCT_AB: 6505 A = product->A; 6506 P = product->B; 6507 m = A->rmap->n; 6508 n = P->cmap->n; 6509 M = A->rmap->N; 6510 N = P->cmap->N; 6511 break; 6512 case MATPRODUCT_AtB: 6513 P = product->A; 6514 A = product->B; 6515 m = P->cmap->n; 6516 n = A->cmap->n; 6517 M = P->cmap->N; 6518 N = A->cmap->N; 6519 hasoffproc = PETSC_TRUE; 6520 break; 6521 case MATPRODUCT_PtAP: 6522 A = product->A; 6523 P = product->B; 6524 m = P->cmap->n; 6525 n = P->cmap->n; 6526 M = P->cmap->N; 6527 N = P->cmap->N; 6528 hasoffproc = PETSC_TRUE; 6529 break; 6530 default: 6531 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6532 } 6533 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6534 if (size == 1) hasoffproc = PETSC_FALSE; 6535 6536 /* defaults */ 6537 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6538 mp[i] = NULL; 6539 mptmp[i] = PETSC_FALSE; 6540 rmapt[i] = -1; 6541 cmapt[i] = -1; 6542 rmapa[i] = NULL; 6543 cmapa[i] = NULL; 6544 } 6545 6546 /* customization */ 6547 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6548 mmdata->reusesym = product->api_user; 6549 if (ptype == MATPRODUCT_AB) { 6550 if (product->api_user) { 6551 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6552 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6553 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6554 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6555 } else { 6556 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6557 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6558 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6559 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6560 } 6561 } else if (ptype == MATPRODUCT_PtAP) { 6562 if (product->api_user) { 6563 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6564 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6565 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6566 } else { 6567 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6568 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6569 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6570 } 6571 } 6572 a = (Mat_MPIAIJ*)A->data; 6573 p = (Mat_MPIAIJ*)P->data; 6574 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6575 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6576 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6577 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6578 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6579 switch (ptype) { 6580 case MATPRODUCT_AB: /* A * P */ 6581 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6582 6583 if (mmdata->abmerge) { /* A_diag * P_loc and A_off * P_oth */ 6584 /* P is product->B */ 6585 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6586 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6587 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6588 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6589 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6590 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6591 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6592 mp[cp]->product->api_user = product->api_user; 6593 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6594 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6595 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6596 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6597 rmapt[cp] = 1; 6598 cmapt[cp] = 2; 6599 cmapa[cp] = globidx; 6600 mptmp[cp] = PETSC_FALSE; 6601 cp++; 6602 } else { /* A_diag * P_diag and A_diag * P_off and A_off * P_oth */ 6603 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6604 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6605 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6606 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6607 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6608 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6609 mp[cp]->product->api_user = product->api_user; 6610 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6611 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6612 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6613 rmapt[cp] = 1; 6614 cmapt[cp] = 1; 6615 mptmp[cp] = PETSC_FALSE; 6616 cp++; 6617 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6618 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6619 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6620 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6621 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6622 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6623 mp[cp]->product->api_user = product->api_user; 6624 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6625 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6626 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6627 rmapt[cp] = 1; 6628 cmapt[cp] = 2; 6629 cmapa[cp] = p->garray; 6630 mptmp[cp] = PETSC_FALSE; 6631 cp++; 6632 } 6633 if (mmdata->P_oth) { 6634 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6635 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6636 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6637 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6638 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6639 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6640 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6641 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6642 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6643 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6644 mp[cp]->product->api_user = product->api_user; 6645 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6646 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6647 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6648 rmapt[cp] = 1; 6649 cmapt[cp] = 2; 6650 cmapa[cp] = P_oth_idx; 6651 mptmp[cp] = PETSC_FALSE; 6652 cp++; 6653 } 6654 break; 6655 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6656 /* A is product->B */ 6657 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6658 if (A == P) { 6659 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6660 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6661 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6662 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6663 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6664 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6665 mp[cp]->product->api_user = product->api_user; 6666 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6667 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6668 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6669 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6670 rmapt[cp] = 2; 6671 rmapa[cp] = globidx; 6672 cmapt[cp] = 2; 6673 cmapa[cp] = globidx; 6674 mptmp[cp] = PETSC_FALSE; 6675 cp++; 6676 } else { 6677 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6678 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6679 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6680 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6681 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6682 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6683 mp[cp]->product->api_user = product->api_user; 6684 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6685 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6686 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6687 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6688 rmapt[cp] = 1; 6689 cmapt[cp] = 2; 6690 cmapa[cp] = globidx; 6691 mptmp[cp] = PETSC_FALSE; 6692 cp++; 6693 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6694 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6695 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6696 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6697 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6698 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6699 mp[cp]->product->api_user = product->api_user; 6700 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6701 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6702 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6703 rmapt[cp] = 2; 6704 rmapa[cp] = p->garray; 6705 cmapt[cp] = 2; 6706 cmapa[cp] = globidx; 6707 mptmp[cp] = PETSC_FALSE; 6708 cp++; 6709 } 6710 break; 6711 case MATPRODUCT_PtAP: 6712 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6713 /* P is product->B */ 6714 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6715 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6716 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6717 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6718 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6719 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6720 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6721 mp[cp]->product->api_user = product->api_user; 6722 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6723 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6724 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6725 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6726 rmapt[cp] = 2; 6727 rmapa[cp] = globidx; 6728 cmapt[cp] = 2; 6729 cmapa[cp] = globidx; 6730 mptmp[cp] = PETSC_FALSE; 6731 cp++; 6732 if (mmdata->P_oth) { 6733 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6734 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6735 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6736 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6737 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6738 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6739 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6740 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6741 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6742 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6743 mp[cp]->product->api_user = product->api_user; 6744 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6745 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6746 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6747 mptmp[cp] = PETSC_TRUE; 6748 cp++; 6749 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6750 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6751 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6752 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6753 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6754 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6755 mp[cp]->product->api_user = product->api_user; 6756 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6757 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6758 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6759 rmapt[cp] = 2; 6760 rmapa[cp] = globidx; 6761 cmapt[cp] = 2; 6762 cmapa[cp] = P_oth_idx; 6763 mptmp[cp] = PETSC_FALSE; 6764 cp++; 6765 } 6766 break; 6767 default: 6768 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6769 } 6770 /* sanity check */ 6771 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6772 6773 ierr = PetscMalloc1(cp,&mmdata->mp);CHKERRQ(ierr); 6774 for (i = 0; i < cp; i++) mmdata->mp[i] = mp[i]; 6775 ierr = PetscMalloc1(cp,&mmdata->mptmp);CHKERRQ(ierr); 6776 for (i = 0; i < cp; i++) mmdata->mptmp[i] = mptmp[i]; 6777 mmdata->cp = cp; 6778 C->product->data = mmdata; 6779 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6780 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6781 6782 /* memory type */ 6783 mmdata->mtype = PETSC_MEMTYPE_HOST; 6784 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6785 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6786 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6787 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6788 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6789 6790 /* prepare coo coordinates for values insertion */ 6791 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6792 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6793 if (mptmp[cp]) continue; 6794 if (rmapt[cp] == 2 && hasoffproc) { 6795 const PetscInt *rmap = rmapa[cp]; 6796 const PetscInt mr = mp[cp]->rmap->n; 6797 const PetscInt rs = C->rmap->rstart; 6798 const PetscInt re = C->rmap->rend; 6799 const PetscInt *ii = mm->i; 6800 for (i = 0; i < mr; i++) { 6801 const PetscInt gr = rmap[i]; 6802 const PetscInt nz = ii[i+1] - ii[i]; 6803 if (gr < rs || gr >= re) ncoo_o += nz; 6804 else ncoo_oown += nz; 6805 } 6806 } else ncoo_d += mm->nz; 6807 } 6808 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); 6809 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6810 if (hasoffproc) { /* handle offproc values insertion */ 6811 PetscSF msf; 6812 PetscInt ncoo2,*coo_i2,*coo_j2; 6813 6814 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6815 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6816 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); 6817 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6818 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6819 PetscInt *idxoff = mmdata->off[cp]; 6820 PetscInt *idxown = mmdata->own[cp]; 6821 if (!mptmp[cp] && rmapt[cp] == 2) { 6822 const PetscInt *rmap = rmapa[cp]; 6823 const PetscInt *cmap = cmapa[cp]; 6824 const PetscInt *ii = mm->i; 6825 PetscInt *coi = coo_i + ncoo_o; 6826 PetscInt *coj = coo_j + ncoo_o; 6827 const PetscInt mr = mp[cp]->rmap->n; 6828 const PetscInt rs = C->rmap->rstart; 6829 const PetscInt re = C->rmap->rend; 6830 const PetscInt cs = C->cmap->rstart; 6831 for (i = 0; i < mr; i++) { 6832 const PetscInt *jj = mm->j + ii[i]; 6833 const PetscInt gr = rmap[i]; 6834 const PetscInt nz = ii[i+1] - ii[i]; 6835 if (gr < rs || gr >= re) { 6836 for (j = ii[i]; j < ii[i+1]; j++) { 6837 *coi++ = gr; 6838 *idxoff++ = j; 6839 } 6840 if (!cmapt[cp]) { /* already global */ 6841 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6842 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6843 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6844 } else { /* offdiag */ 6845 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6846 } 6847 ncoo_o += nz; 6848 } else { 6849 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6850 } 6851 } 6852 } 6853 mmdata->off[cp + 1] = idxoff; 6854 mmdata->own[cp + 1] = idxown; 6855 } 6856 6857 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6858 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o,NULL,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6859 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6860 ierr = PetscSFGetGraph(msf,&ncoo2,NULL,NULL,NULL);CHKERRQ(ierr); 6861 ncoo = ncoo_d + ncoo_oown + ncoo2; 6862 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6863 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6864 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6865 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6866 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6867 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6868 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6869 coo_i = coo_i2; 6870 coo_j = coo_j2; 6871 } else { /* no offproc values insertion */ 6872 ncoo = ncoo_d; 6873 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6874 6875 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6876 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6877 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6878 } 6879 mmdata->hasoffproc = hasoffproc; 6880 6881 /* on-process indices */ 6882 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6883 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6884 PetscInt *coi = coo_i + ncoo_d; 6885 PetscInt *coj = coo_j + ncoo_d; 6886 const PetscInt *jj = mm->j; 6887 const PetscInt *ii = mm->i; 6888 const PetscInt *cmap = cmapa[cp]; 6889 const PetscInt *rmap = rmapa[cp]; 6890 const PetscInt mr = mp[cp]->rmap->n; 6891 const PetscInt rs = C->rmap->rstart; 6892 const PetscInt re = C->rmap->rend; 6893 const PetscInt cs = C->cmap->rstart; 6894 6895 if (mptmp[cp]) continue; 6896 if (rmapt[cp] == 1) { 6897 for (i = 0; i < mr; i++) { 6898 const PetscInt gr = i + rs; 6899 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6900 } 6901 /* columns coo */ 6902 if (!cmapt[cp]) { 6903 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6904 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6905 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; 6906 } else { /* offdiag */ 6907 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6908 } 6909 ncoo_d += mm->nz; 6910 } else if (rmapt[cp] == 2) { 6911 for (i = 0; i < mr; i++) { 6912 const PetscInt *jj = mm->j + ii[i]; 6913 const PetscInt gr = rmap[i]; 6914 const PetscInt nz = ii[i+1] - ii[i]; 6915 if (gr >= rs && gr < re) { 6916 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6917 if (!cmapt[cp]) { /* already global */ 6918 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6919 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6920 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6921 } else { /* offdiag */ 6922 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6923 } 6924 ncoo_d += nz; 6925 } 6926 } 6927 } 6928 } 6929 if (glob) { 6930 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6931 } 6932 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6933 if (P_oth_l2g) { 6934 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6935 } 6936 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6937 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6938 6939 /* preallocate with COO data */ 6940 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6941 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6942 PetscFunctionReturn(0); 6943 } 6944 6945 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 6946 { 6947 Mat_Product *product = mat->product; 6948 PetscErrorCode ierr; 6949 #if defined(PETSC_HAVE_DEVICE) 6950 PetscBool match = PETSC_FALSE; 6951 PetscBool usecpu = PETSC_FALSE; 6952 #else 6953 PetscBool match = PETSC_TRUE; 6954 #endif 6955 6956 PetscFunctionBegin; 6957 MatCheckProduct(mat,1); 6958 #if defined(PETSC_HAVE_DEVICE) 6959 if (!product->A->boundtocpu && !product->B->boundtocpu) { 6960 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 6961 } 6962 if (match) { /* we can always fallback to CPU in case an operation is not performing on the device */ 6963 switch (product->type) { 6964 case MATPRODUCT_AB: 6965 if (product->api_user) { 6966 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6967 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6968 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6969 } else { 6970 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6971 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6972 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6973 } 6974 break; 6975 case MATPRODUCT_AtB: 6976 if (product->api_user) { 6977 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 6978 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6979 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6980 } else { 6981 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 6982 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6983 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6984 } 6985 break; 6986 case MATPRODUCT_PtAP: 6987 if (product->api_user) { 6988 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6989 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6990 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6991 } else { 6992 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6993 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6994 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6995 } 6996 break; 6997 default: 6998 break; 6999 } 7000 match = (PetscBool)!usecpu; 7001 } 7002 #endif 7003 if (match) { 7004 switch (product->type) { 7005 case MATPRODUCT_AB: 7006 case MATPRODUCT_AtB: 7007 case MATPRODUCT_PtAP: 7008 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7009 break; 7010 default: 7011 break; 7012 } 7013 } 7014 /* fallback to MPIAIJ ops */ 7015 if (!mat->ops->productsymbolic) { 7016 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7017 } 7018 PetscFunctionReturn(0); 7019 } 7020