1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 66 { 67 PetscErrorCode ierr; 68 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 69 70 PetscFunctionBegin; 71 if (mat->A) { 72 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 73 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 74 } 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 79 { 80 PetscErrorCode ierr; 81 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 82 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 83 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 84 const PetscInt *ia,*ib; 85 const MatScalar *aa,*bb,*aav,*bav; 86 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 87 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 88 89 PetscFunctionBegin; 90 *keptrows = NULL; 91 92 ia = a->i; 93 ib = b->i; 94 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 95 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) { 100 cnt++; 101 goto ok1; 102 } 103 aa = aav + ia[i]; 104 for (j=0; j<na; j++) { 105 if (aa[j] != 0.0) goto ok1; 106 } 107 bb = bav + ib[i]; 108 for (j=0; j <nb; j++) { 109 if (bb[j] != 0.0) goto ok1; 110 } 111 cnt++; 112 ok1:; 113 } 114 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 115 if (!n0rows) { 116 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 117 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 121 cnt = 0; 122 for (i=0; i<m; i++) { 123 na = ia[i+1] - ia[i]; 124 nb = ib[i+1] - ib[i]; 125 if (!na && !nb) continue; 126 aa = aav + ia[i]; 127 for (j=0; j<na;j++) { 128 if (aa[j] != 0.0) { 129 rows[cnt++] = rstart + i; 130 goto ok2; 131 } 132 } 133 bb = bav + ib[i]; 134 for (j=0; j<nb; j++) { 135 if (bb[j] != 0.0) { 136 rows[cnt++] = rstart + i; 137 goto ok2; 138 } 139 } 140 ok2:; 141 } 142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 143 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 144 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 152 PetscBool cong; 153 154 PetscFunctionBegin; 155 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 156 if (Y->assembled && cong) { 157 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 158 } else { 159 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 160 } 161 PetscFunctionReturn(0); 162 } 163 164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 165 { 166 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 167 PetscErrorCode ierr; 168 PetscInt i,rstart,nrows,*rows; 169 170 PetscFunctionBegin; 171 *zrows = NULL; 172 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 173 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 174 for (i=0; i<nrows; i++) rows[i] += rstart; 175 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,ReductionType type,PetscReal *reductions) 180 { 181 PetscErrorCode ierr; 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 183 PetscInt i,m,n,*garray = aij->garray; 184 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 185 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 186 PetscReal *work; 187 const PetscScalar *dummy; 188 189 PetscFunctionBegin; 190 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 191 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 192 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 193 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 194 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 195 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 196 if (type == REDUCTION_NORM_2) { 197 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 198 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 199 } 200 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 201 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 202 } 203 } else if (type == REDUCTION_NORM_1) { 204 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 205 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 206 } 207 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 208 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 209 } 210 } else if (type == REDUCTION_NORM_INFINITY) { 211 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 212 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 213 } 214 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 215 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 216 } 217 } else if (type == REDUCTION_SUM || type == REDUCTION_MEAN) { 218 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 219 work[A->cmap->rstart + a_aij->j[i]] += a_aij->a[i]; 220 } 221 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 222 work[garray[b_aij->j[i]]] += b_aij->a[i]; 223 } 224 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown ReductionType"); 225 if (type == REDUCTION_NORM_INFINITY) { 226 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 227 } else { 228 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 229 } 230 ierr = PetscFree(work);CHKERRQ(ierr); 231 if (type == REDUCTION_NORM_2) { 232 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 233 } else if (type == REDUCTION_MEAN) { 234 for (i=0; i<n; i++) reductions[i] /= m; 235 } 236 PetscFunctionReturn(0); 237 } 238 239 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 240 { 241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 242 IS sis,gis; 243 PetscErrorCode ierr; 244 const PetscInt *isis,*igis; 245 PetscInt n,*iis,nsis,ngis,rstart,i; 246 247 PetscFunctionBegin; 248 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 249 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 250 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 251 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 252 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 253 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 254 255 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 256 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 257 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 258 n = ngis + nsis; 259 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 260 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 261 for (i=0; i<n; i++) iis[i] += rstart; 262 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 263 264 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 265 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 266 ierr = ISDestroy(&sis);CHKERRQ(ierr); 267 ierr = ISDestroy(&gis);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 /* 272 Local utility routine that creates a mapping from the global column 273 number to the local number in the off-diagonal part of the local 274 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 275 a slightly higher hash table cost; without it it is not scalable (each processor 276 has an order N integer array but is fast to access. 277 */ 278 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 279 { 280 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 281 PetscErrorCode ierr; 282 PetscInt n = aij->B->cmap->n,i; 283 284 PetscFunctionBegin; 285 if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 286 #if defined(PETSC_USE_CTABLE) 287 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 288 for (i=0; i<n; i++) { 289 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 290 } 291 #else 292 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 293 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 294 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 295 #endif 296 PetscFunctionReturn(0); 297 } 298 299 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 300 { \ 301 if (col <= lastcol1) low1 = 0; \ 302 else high1 = nrow1; \ 303 lastcol1 = col;\ 304 while (high1-low1 > 5) { \ 305 t = (low1+high1)/2; \ 306 if (rp1[t] > col) high1 = t; \ 307 else low1 = t; \ 308 } \ 309 for (_i=low1; _i<high1; _i++) { \ 310 if (rp1[_i] > col) break; \ 311 if (rp1[_i] == col) { \ 312 if (addv == ADD_VALUES) { \ 313 ap1[_i] += value; \ 314 /* Not sure LogFlops will slow dow the code or not */ \ 315 (void)PetscLogFlops(1.0); \ 316 } \ 317 else ap1[_i] = value; \ 318 inserted = PETSC_TRUE; \ 319 goto a_noinsert; \ 320 } \ 321 } \ 322 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 323 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 324 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 325 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 326 N = nrow1++ - 1; a->nz++; high1++; \ 327 /* shift up all the later entries in this row */ \ 328 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 329 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 330 rp1[_i] = col; \ 331 ap1[_i] = value; \ 332 A->nonzerostate++;\ 333 a_noinsert: ; \ 334 ailen[row] = nrow1; \ 335 } 336 337 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 338 { \ 339 if (col <= lastcol2) low2 = 0; \ 340 else high2 = nrow2; \ 341 lastcol2 = col; \ 342 while (high2-low2 > 5) { \ 343 t = (low2+high2)/2; \ 344 if (rp2[t] > col) high2 = t; \ 345 else low2 = t; \ 346 } \ 347 for (_i=low2; _i<high2; _i++) { \ 348 if (rp2[_i] > col) break; \ 349 if (rp2[_i] == col) { \ 350 if (addv == ADD_VALUES) { \ 351 ap2[_i] += value; \ 352 (void)PetscLogFlops(1.0); \ 353 } \ 354 else ap2[_i] = value; \ 355 inserted = PETSC_TRUE; \ 356 goto b_noinsert; \ 357 } \ 358 } \ 359 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 360 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 361 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 362 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 363 N = nrow2++ - 1; b->nz++; high2++; \ 364 /* shift up all the later entries in this row */ \ 365 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 366 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 367 rp2[_i] = col; \ 368 ap2[_i] = value; \ 369 B->nonzerostate++; \ 370 b_noinsert: ; \ 371 bilen[row] = nrow2; \ 372 } 373 374 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 375 { 376 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 377 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 378 PetscErrorCode ierr; 379 PetscInt l,*garray = mat->garray,diag; 380 381 PetscFunctionBegin; 382 /* code only works for square matrices A */ 383 384 /* find size of row to the left of the diagonal part */ 385 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 386 row = row - diag; 387 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 388 if (garray[b->j[b->i[row]+l]] > diag) break; 389 } 390 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 391 392 /* diagonal part */ 393 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 394 395 /* right of diagonal part */ 396 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 397 #if defined(PETSC_HAVE_DEVICE) 398 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 399 #endif 400 PetscFunctionReturn(0); 401 } 402 403 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 404 { 405 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 406 PetscScalar value = 0.0; 407 PetscErrorCode ierr; 408 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 409 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 410 PetscBool roworiented = aij->roworiented; 411 412 /* Some Variables required in the macro */ 413 Mat A = aij->A; 414 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 415 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 416 PetscBool ignorezeroentries = a->ignorezeroentries; 417 Mat B = aij->B; 418 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 419 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 420 MatScalar *aa,*ba; 421 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 422 * cannot use "#if defined" inside a macro. */ 423 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 424 425 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 426 PetscInt nonew; 427 MatScalar *ap1,*ap2; 428 429 PetscFunctionBegin; 430 #if defined(PETSC_HAVE_DEVICE) 431 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 432 const PetscScalar *dummy; 433 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 434 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 435 } 436 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 437 const PetscScalar *dummy; 438 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 439 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 440 } 441 #endif 442 aa = a->a; 443 ba = b->a; 444 for (i=0; i<m; i++) { 445 if (im[i] < 0) continue; 446 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 447 if (im[i] >= rstart && im[i] < rend) { 448 row = im[i] - rstart; 449 lastcol1 = -1; 450 rp1 = aj + ai[row]; 451 ap1 = aa + ai[row]; 452 rmax1 = aimax[row]; 453 nrow1 = ailen[row]; 454 low1 = 0; 455 high1 = nrow1; 456 lastcol2 = -1; 457 rp2 = bj + bi[row]; 458 ap2 = ba + bi[row]; 459 rmax2 = bimax[row]; 460 nrow2 = bilen[row]; 461 low2 = 0; 462 high2 = nrow2; 463 464 for (j=0; j<n; j++) { 465 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 466 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 467 if (in[j] >= cstart && in[j] < cend) { 468 col = in[j] - cstart; 469 nonew = a->nonew; 470 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 471 #if defined(PETSC_HAVE_DEVICE) 472 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 473 #endif 474 } else if (in[j] < 0) continue; 475 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 476 else { 477 if (mat->was_assembled) { 478 if (!aij->colmap) { 479 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 480 } 481 #if defined(PETSC_USE_CTABLE) 482 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 483 col--; 484 #else 485 col = aij->colmap[in[j]] - 1; 486 #endif 487 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 488 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 489 col = in[j]; 490 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 491 B = aij->B; 492 b = (Mat_SeqAIJ*)B->data; 493 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 494 rp2 = bj + bi[row]; 495 ap2 = ba + bi[row]; 496 rmax2 = bimax[row]; 497 nrow2 = bilen[row]; 498 low2 = 0; 499 high2 = nrow2; 500 bm = aij->B->rmap->n; 501 ba = b->a; 502 inserted = PETSC_FALSE; 503 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 504 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 505 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 506 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 507 } 508 } else col = in[j]; 509 nonew = b->nonew; 510 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 511 #if defined(PETSC_HAVE_DEVICE) 512 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 513 #endif 514 } 515 } 516 } else { 517 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 518 if (!aij->donotstash) { 519 mat->assembled = PETSC_FALSE; 520 if (roworiented) { 521 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 522 } else { 523 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 524 } 525 } 526 } 527 } 528 PetscFunctionReturn(0); 529 } 530 531 /* 532 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 533 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 534 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 535 */ 536 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 537 { 538 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 539 Mat A = aij->A; /* diagonal part of the matrix */ 540 Mat B = aij->B; /* offdiagonal part of the matrix */ 541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 542 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 543 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 544 PetscInt *ailen = a->ilen,*aj = a->j; 545 PetscInt *bilen = b->ilen,*bj = b->j; 546 PetscInt am = aij->A->rmap->n,j; 547 PetscInt diag_so_far = 0,dnz; 548 PetscInt offd_so_far = 0,onz; 549 550 PetscFunctionBegin; 551 /* Iterate over all rows of the matrix */ 552 for (j=0; j<am; j++) { 553 dnz = onz = 0; 554 /* Iterate over all non-zero columns of the current row */ 555 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 556 /* If column is in the diagonal */ 557 if (mat_j[col] >= cstart && mat_j[col] < cend) { 558 aj[diag_so_far++] = mat_j[col] - cstart; 559 dnz++; 560 } else { /* off-diagonal entries */ 561 bj[offd_so_far++] = mat_j[col]; 562 onz++; 563 } 564 } 565 ailen[j] = dnz; 566 bilen[j] = onz; 567 } 568 PetscFunctionReturn(0); 569 } 570 571 /* 572 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 573 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 574 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 575 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 576 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 577 */ 578 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 579 { 580 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 581 Mat A = aij->A; /* diagonal part of the matrix */ 582 Mat B = aij->B; /* offdiagonal part of the matrix */ 583 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 584 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 585 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 586 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 587 PetscInt *ailen = a->ilen,*aj = a->j; 588 PetscInt *bilen = b->ilen,*bj = b->j; 589 PetscInt am = aij->A->rmap->n,j; 590 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 591 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 592 PetscScalar *aa = a->a,*ba = b->a; 593 594 PetscFunctionBegin; 595 /* Iterate over all rows of the matrix */ 596 for (j=0; j<am; j++) { 597 dnz_row = onz_row = 0; 598 rowstart_offd = full_offd_i[j]; 599 rowstart_diag = full_diag_i[j]; 600 /* Iterate over all non-zero columns of the current row */ 601 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 602 /* If column is in the diagonal */ 603 if (mat_j[col] >= cstart && mat_j[col] < cend) { 604 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 605 aa[rowstart_diag+dnz_row] = mat_a[col]; 606 dnz_row++; 607 } else { /* off-diagonal entries */ 608 bj[rowstart_offd+onz_row] = mat_j[col]; 609 ba[rowstart_offd+onz_row] = mat_a[col]; 610 onz_row++; 611 } 612 } 613 ailen[j] = dnz_row; 614 bilen[j] = onz_row; 615 } 616 PetscFunctionReturn(0); 617 } 618 619 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 620 { 621 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 622 PetscErrorCode ierr; 623 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 624 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 625 626 PetscFunctionBegin; 627 for (i=0; i<m; i++) { 628 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 629 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 630 if (idxm[i] >= rstart && idxm[i] < rend) { 631 row = idxm[i] - rstart; 632 for (j=0; j<n; j++) { 633 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 634 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 635 if (idxn[j] >= cstart && idxn[j] < cend) { 636 col = idxn[j] - cstart; 637 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 638 } else { 639 if (!aij->colmap) { 640 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 641 } 642 #if defined(PETSC_USE_CTABLE) 643 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 644 col--; 645 #else 646 col = aij->colmap[idxn[j]] - 1; 647 #endif 648 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 649 else { 650 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 651 } 652 } 653 } 654 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 655 } 656 PetscFunctionReturn(0); 657 } 658 659 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 PetscErrorCode ierr; 663 PetscInt nstash,reallocs; 664 665 PetscFunctionBegin; 666 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 667 668 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 669 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 670 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 675 { 676 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 677 PetscErrorCode ierr; 678 PetscMPIInt n; 679 PetscInt i,j,rstart,ncols,flg; 680 PetscInt *row,*col; 681 PetscBool other_disassembled; 682 PetscScalar *val; 683 684 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 685 686 PetscFunctionBegin; 687 if (!aij->donotstash && !mat->nooffprocentries) { 688 while (1) { 689 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 690 if (!flg) break; 691 692 for (i=0; i<n;) { 693 /* Now identify the consecutive vals belonging to the same row */ 694 for (j=i,rstart=row[j]; j<n; j++) { 695 if (row[j] != rstart) break; 696 } 697 if (j < n) ncols = j-i; 698 else ncols = n-i; 699 /* Now assemble all these values with a single function call */ 700 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 701 i = j; 702 } 703 } 704 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 705 } 706 #if defined(PETSC_HAVE_DEVICE) 707 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 708 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 709 if (mat->boundtocpu) { 710 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 711 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 712 } 713 #endif 714 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 716 717 /* determine if any processor has disassembled, if so we must 718 also disassemble ourself, in order that we may reassemble. */ 719 /* 720 if nonzero structure of submatrix B cannot change then we know that 721 no processor disassembled thus we can skip this stuff 722 */ 723 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 724 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 725 if (mat->was_assembled && !other_disassembled) { 726 #if defined(PETSC_HAVE_DEVICE) 727 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 728 #endif 729 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 730 } 731 } 732 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 733 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 734 } 735 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 736 #if defined(PETSC_HAVE_DEVICE) 737 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 738 #endif 739 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 740 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 741 742 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 743 744 aij->rowvalues = NULL; 745 746 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 747 748 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 749 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 750 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 751 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 752 } 753 #if defined(PETSC_HAVE_DEVICE) 754 mat->offloadmask = PETSC_OFFLOAD_BOTH; 755 #endif 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 760 { 761 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 766 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } 769 770 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 771 { 772 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 773 PetscObjectState sA, sB; 774 PetscInt *lrows; 775 PetscInt r, len; 776 PetscBool cong, lch, gch; 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 /* get locally owned rows */ 781 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 782 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 783 /* fix right hand side if needed */ 784 if (x && b) { 785 const PetscScalar *xx; 786 PetscScalar *bb; 787 788 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 789 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 790 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 791 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 792 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 793 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 794 } 795 796 sA = mat->A->nonzerostate; 797 sB = mat->B->nonzerostate; 798 799 if (diag != 0.0 && cong) { 800 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 801 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 802 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 803 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 804 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 805 PetscInt nnwA, nnwB; 806 PetscBool nnzA, nnzB; 807 808 nnwA = aijA->nonew; 809 nnwB = aijB->nonew; 810 nnzA = aijA->keepnonzeropattern; 811 nnzB = aijB->keepnonzeropattern; 812 if (!nnzA) { 813 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 814 aijA->nonew = 0; 815 } 816 if (!nnzB) { 817 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 818 aijB->nonew = 0; 819 } 820 /* Must zero here before the next loop */ 821 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 822 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 823 for (r = 0; r < len; ++r) { 824 const PetscInt row = lrows[r] + A->rmap->rstart; 825 if (row >= A->cmap->N) continue; 826 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 827 } 828 aijA->nonew = nnwA; 829 aijB->nonew = nnwB; 830 } else { 831 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 832 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 833 } 834 ierr = PetscFree(lrows);CHKERRQ(ierr); 835 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 836 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 837 838 /* reduce nonzerostate */ 839 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 840 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 841 if (gch) A->nonzerostate++; 842 PetscFunctionReturn(0); 843 } 844 845 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 846 { 847 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 848 PetscErrorCode ierr; 849 PetscMPIInt n = A->rmap->n; 850 PetscInt i,j,r,m,len = 0; 851 PetscInt *lrows,*owners = A->rmap->range; 852 PetscMPIInt p = 0; 853 PetscSFNode *rrows; 854 PetscSF sf; 855 const PetscScalar *xx; 856 PetscScalar *bb,*mask; 857 Vec xmask,lmask; 858 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 859 const PetscInt *aj, *ii,*ridx; 860 PetscScalar *aa; 861 862 PetscFunctionBegin; 863 /* Create SF where leaves are input rows and roots are owned rows */ 864 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 865 for (r = 0; r < n; ++r) lrows[r] = -1; 866 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 867 for (r = 0; r < N; ++r) { 868 const PetscInt idx = rows[r]; 869 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 870 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 871 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 872 } 873 rrows[r].rank = p; 874 rrows[r].index = rows[r] - owners[p]; 875 } 876 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 877 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 878 /* Collect flags for rows to be zeroed */ 879 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 880 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 881 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 882 /* Compress and put in row numbers */ 883 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 884 /* zero diagonal part of matrix */ 885 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 886 /* handle off diagonal part of matrix */ 887 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 888 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 889 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 890 for (i=0; i<len; i++) bb[lrows[i]] = 1; 891 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 892 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 895 if (x && b) { /* this code is buggy when the row and column layout don't match */ 896 PetscBool cong; 897 898 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 899 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 900 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 903 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 904 } 905 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 906 /* remove zeroed rows of off diagonal matrix */ 907 ii = aij->i; 908 for (i=0; i<len; i++) { 909 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 910 } 911 /* loop over all elements of off process part of matrix zeroing removed columns*/ 912 if (aij->compressedrow.use) { 913 m = aij->compressedrow.nrows; 914 ii = aij->compressedrow.i; 915 ridx = aij->compressedrow.rindex; 916 for (i=0; i<m; i++) { 917 n = ii[i+1] - ii[i]; 918 aj = aij->j + ii[i]; 919 aa = aij->a + ii[i]; 920 921 for (j=0; j<n; j++) { 922 if (PetscAbsScalar(mask[*aj])) { 923 if (b) bb[*ridx] -= *aa*xx[*aj]; 924 *aa = 0.0; 925 } 926 aa++; 927 aj++; 928 } 929 ridx++; 930 } 931 } else { /* do not use compressed row format */ 932 m = l->B->rmap->n; 933 for (i=0; i<m; i++) { 934 n = ii[i+1] - ii[i]; 935 aj = aij->j + ii[i]; 936 aa = aij->a + ii[i]; 937 for (j=0; j<n; j++) { 938 if (PetscAbsScalar(mask[*aj])) { 939 if (b) bb[i] -= *aa*xx[*aj]; 940 *aa = 0.0; 941 } 942 aa++; 943 aj++; 944 } 945 } 946 } 947 if (x && b) { 948 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 949 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 950 } 951 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 952 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 953 ierr = PetscFree(lrows);CHKERRQ(ierr); 954 955 /* only change matrix nonzero state if pattern was allowed to be changed */ 956 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 957 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 958 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 959 } 960 PetscFunctionReturn(0); 961 } 962 963 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 964 { 965 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 966 PetscErrorCode ierr; 967 PetscInt nt; 968 VecScatter Mvctx = a->Mvctx; 969 970 PetscFunctionBegin; 971 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 972 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 973 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 974 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 975 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } 979 980 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 981 { 982 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 991 { 992 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 993 PetscErrorCode ierr; 994 VecScatter Mvctx = a->Mvctx; 995 996 PetscFunctionBegin; 997 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 999 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1000 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1005 { 1006 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1007 PetscErrorCode ierr; 1008 1009 PetscFunctionBegin; 1010 /* do nondiagonal part */ 1011 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1012 /* do local part */ 1013 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1014 /* add partial results together */ 1015 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1021 { 1022 MPI_Comm comm; 1023 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1024 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1025 IS Me,Notme; 1026 PetscErrorCode ierr; 1027 PetscInt M,N,first,last,*notme,i; 1028 PetscBool lf; 1029 PetscMPIInt size; 1030 1031 PetscFunctionBegin; 1032 /* Easy test: symmetric diagonal block */ 1033 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1034 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1035 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1036 if (!*f) PetscFunctionReturn(0); 1037 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1038 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1039 if (size == 1) PetscFunctionReturn(0); 1040 1041 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1042 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1043 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1044 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1045 for (i=0; i<first; i++) notme[i] = i; 1046 for (i=last; i<M; i++) notme[i-last+first] = i; 1047 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1048 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1049 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1050 Aoff = Aoffs[0]; 1051 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1052 Boff = Boffs[0]; 1053 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1054 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1055 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1056 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1057 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1058 ierr = PetscFree(notme);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1063 { 1064 PetscErrorCode ierr; 1065 1066 PetscFunctionBegin; 1067 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* do local part */ 1080 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1081 /* add partial results together */ 1082 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1083 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* 1088 This only works correctly for square matrices where the subblock A->A is the 1089 diagonal block 1090 */ 1091 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1092 { 1093 PetscErrorCode ierr; 1094 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1095 1096 PetscFunctionBegin; 1097 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1098 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1099 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1110 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1115 { 1116 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1117 PetscErrorCode ierr; 1118 1119 PetscFunctionBegin; 1120 #if defined(PETSC_USE_LOG) 1121 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1122 #endif 1123 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1124 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1125 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1126 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1127 #if defined(PETSC_USE_CTABLE) 1128 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1129 #else 1130 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1131 #endif 1132 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1133 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1134 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1135 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1136 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1137 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1138 1139 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1140 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1141 1142 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1152 #if defined(PETSC_HAVE_CUDA) 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1154 #endif 1155 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1156 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1157 #endif 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1159 #if defined(PETSC_HAVE_ELEMENTAL) 1160 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1161 #endif 1162 #if defined(PETSC_HAVE_SCALAPACK) 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1164 #endif 1165 #if defined(PETSC_HAVE_HYPRE) 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1168 #endif 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1175 #if defined(PETSC_HAVE_MKL_SPARSE) 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1177 #endif 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1185 { 1186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1187 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1188 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1189 const PetscInt *garray = aij->garray; 1190 const PetscScalar *aa,*ba; 1191 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1192 PetscInt *rowlens; 1193 PetscInt *colidxs; 1194 PetscScalar *matvals; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1199 1200 M = mat->rmap->N; 1201 N = mat->cmap->N; 1202 m = mat->rmap->n; 1203 rs = mat->rmap->rstart; 1204 cs = mat->cmap->rstart; 1205 nz = A->nz + B->nz; 1206 1207 /* write matrix header */ 1208 header[0] = MAT_FILE_CLASSID; 1209 header[1] = M; header[2] = N; header[3] = nz; 1210 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1211 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1212 1213 /* fill in and store row lengths */ 1214 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1215 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1216 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1217 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1218 1219 /* fill in and store column indices */ 1220 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1221 for (cnt=0, i=0; i<m; i++) { 1222 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1223 if (garray[B->j[jb]] > cs) break; 1224 colidxs[cnt++] = garray[B->j[jb]]; 1225 } 1226 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1227 colidxs[cnt++] = A->j[ja] + cs; 1228 for (; jb<B->i[i+1]; jb++) 1229 colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1232 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1233 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1234 1235 /* fill in and store nonzero values */ 1236 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1237 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1238 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1239 for (cnt=0, i=0; i<m; i++) { 1240 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1241 if (garray[B->j[jb]] > cs) break; 1242 matvals[cnt++] = ba[jb]; 1243 } 1244 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1245 matvals[cnt++] = aa[ja]; 1246 for (; jb<B->i[i+1]; jb++) 1247 matvals[cnt++] = ba[jb]; 1248 } 1249 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1250 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1251 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1252 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1253 ierr = PetscFree(matvals);CHKERRQ(ierr); 1254 1255 /* write block size option to the viewer's .info file */ 1256 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #include <petscdraw.h> 1261 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1262 { 1263 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1264 PetscErrorCode ierr; 1265 PetscMPIInt rank = aij->rank,size = aij->size; 1266 PetscBool isdraw,iascii,isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1272 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1273 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1274 if (iascii) { 1275 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1278 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1279 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1280 for (i=0; i<(PetscInt)size; i++) { 1281 nmax = PetscMax(nmax,nz[i]); 1282 nmin = PetscMin(nmin,nz[i]); 1283 navg += nz[i]; 1284 } 1285 ierr = PetscFree(nz);CHKERRQ(ierr); 1286 navg = navg/size; 1287 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscInt *inodes=NULL; 1294 1295 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1296 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1297 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1298 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1299 if (!inodes) { 1300 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1301 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1302 } else { 1303 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1304 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1305 } 1306 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1308 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1309 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1310 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1312 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1313 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount,inodelimit,*inodes; 1317 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1318 if (inodes) { 1319 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1322 } 1323 PetscFunctionReturn(0); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(0); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1330 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1331 } else { 1332 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1333 } 1334 PetscFunctionReturn(0); 1335 } else if (iascii && size == 1) { 1336 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1337 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } else if (isdraw) { 1340 PetscDraw draw; 1341 PetscBool isnull; 1342 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1343 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1344 if (isnull) PetscFunctionReturn(0); 1345 } 1346 1347 { /* assemble the entire matrix onto first processor */ 1348 Mat A = NULL, Av; 1349 IS isrow,iscol; 1350 1351 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1352 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1353 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1354 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1355 /* The commented code uses MatCreateSubMatrices instead */ 1356 /* 1357 Mat *AA, A = NULL, Av; 1358 IS isrow,iscol; 1359 1360 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1361 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1362 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1363 if (!rank) { 1364 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1365 A = AA[0]; 1366 Av = AA[0]; 1367 } 1368 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1369 */ 1370 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1371 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1372 /* 1373 Everyone has to call to draw the matrix since the graphics waits are 1374 synchronized across all processors that share the PetscDraw object 1375 */ 1376 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1377 if (!rank) { 1378 if (((PetscObject)mat)->name) { 1379 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1380 } 1381 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1382 } 1383 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1384 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1385 ierr = MatDestroy(&A);CHKERRQ(ierr); 1386 } 1387 PetscFunctionReturn(0); 1388 } 1389 1390 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1391 { 1392 PetscErrorCode ierr; 1393 PetscBool iascii,isdraw,issocket,isbinary; 1394 1395 PetscFunctionBegin; 1396 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1397 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1398 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1399 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1400 if (iascii || isdraw || isbinary || issocket) { 1401 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1407 { 1408 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1409 PetscErrorCode ierr; 1410 Vec bb1 = NULL; 1411 PetscBool hasop; 1412 1413 PetscFunctionBegin; 1414 if (flag == SOR_APPLY_UPPER) { 1415 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1420 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1421 } 1422 1423 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1424 if (flag & SOR_ZERO_INITIAL_GUESS) { 1425 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1426 its--; 1427 } 1428 1429 while (its--) { 1430 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1431 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1432 1433 /* update rhs: bb1 = bb - B*x */ 1434 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1435 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1436 1437 /* local sweep */ 1438 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1439 } 1440 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1441 if (flag & SOR_ZERO_INITIAL_GUESS) { 1442 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1443 its--; 1444 } 1445 while (its--) { 1446 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1447 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1448 1449 /* update rhs: bb1 = bb - B*x */ 1450 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1451 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1452 1453 /* local sweep */ 1454 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1455 } 1456 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1457 if (flag & SOR_ZERO_INITIAL_GUESS) { 1458 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1459 its--; 1460 } 1461 while (its--) { 1462 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1463 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1464 1465 /* update rhs: bb1 = bb - B*x */ 1466 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1467 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1468 1469 /* local sweep */ 1470 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1471 } 1472 } else if (flag & SOR_EISENSTAT) { 1473 Vec xx1; 1474 1475 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1476 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1477 1478 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1479 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1480 if (!mat->diag) { 1481 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1482 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1483 } 1484 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1485 if (hasop) { 1486 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1487 } else { 1488 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1489 } 1490 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1491 1492 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1493 1494 /* local sweep */ 1495 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1496 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1497 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1498 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1499 1500 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1501 1502 matin->factorerrortype = mat->A->factorerrortype; 1503 PetscFunctionReturn(0); 1504 } 1505 1506 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1507 { 1508 Mat aA,aB,Aperm; 1509 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1510 PetscScalar *aa,*ba; 1511 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1512 PetscSF rowsf,sf; 1513 IS parcolp = NULL; 1514 PetscBool done; 1515 PetscErrorCode ierr; 1516 1517 PetscFunctionBegin; 1518 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1519 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1520 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1521 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1522 1523 /* Invert row permutation to find out where my rows should go */ 1524 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1525 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1526 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1527 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1528 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1529 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1530 1531 /* Invert column permutation to find out where my columns should go */ 1532 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1533 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1534 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1535 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1536 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1537 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1538 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1539 1540 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1541 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1542 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1543 1544 /* Find out where my gcols should go */ 1545 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1546 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1547 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1548 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1549 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1550 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1551 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1552 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1553 1554 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1555 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1556 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1557 for (i=0; i<m; i++) { 1558 PetscInt row = rdest[i]; 1559 PetscMPIInt rowner; 1560 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1561 for (j=ai[i]; j<ai[i+1]; j++) { 1562 PetscInt col = cdest[aj[j]]; 1563 PetscMPIInt cowner; 1564 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1565 if (rowner == cowner) dnnz[i]++; 1566 else onnz[i]++; 1567 } 1568 for (j=bi[i]; j<bi[i+1]; j++) { 1569 PetscInt col = gcdest[bj[j]]; 1570 PetscMPIInt cowner; 1571 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1572 if (rowner == cowner) dnnz[i]++; 1573 else onnz[i]++; 1574 } 1575 } 1576 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1577 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1578 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1579 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1580 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1581 1582 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1583 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1584 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1585 for (i=0; i<m; i++) { 1586 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1587 PetscInt j0,rowlen; 1588 rowlen = ai[i+1] - ai[i]; 1589 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1590 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1591 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1592 } 1593 rowlen = bi[i+1] - bi[i]; 1594 for (j0=j=0; j<rowlen; j0=j) { 1595 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1596 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1597 } 1598 } 1599 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1600 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1601 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1602 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1603 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1604 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1605 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1606 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1607 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1608 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1609 *B = Aperm; 1610 PetscFunctionReturn(0); 1611 } 1612 1613 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1614 { 1615 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1616 PetscErrorCode ierr; 1617 1618 PetscFunctionBegin; 1619 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1620 if (ghosts) *ghosts = aij->garray; 1621 PetscFunctionReturn(0); 1622 } 1623 1624 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1625 { 1626 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1627 Mat A = mat->A,B = mat->B; 1628 PetscErrorCode ierr; 1629 PetscLogDouble isend[5],irecv[5]; 1630 1631 PetscFunctionBegin; 1632 info->block_size = 1.0; 1633 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1634 1635 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1636 isend[3] = info->memory; isend[4] = info->mallocs; 1637 1638 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1639 1640 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1641 isend[3] += info->memory; isend[4] += info->mallocs; 1642 if (flag == MAT_LOCAL) { 1643 info->nz_used = isend[0]; 1644 info->nz_allocated = isend[1]; 1645 info->nz_unneeded = isend[2]; 1646 info->memory = isend[3]; 1647 info->mallocs = isend[4]; 1648 } else if (flag == MAT_GLOBAL_MAX) { 1649 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1650 1651 info->nz_used = irecv[0]; 1652 info->nz_allocated = irecv[1]; 1653 info->nz_unneeded = irecv[2]; 1654 info->memory = irecv[3]; 1655 info->mallocs = irecv[4]; 1656 } else if (flag == MAT_GLOBAL_SUM) { 1657 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1658 1659 info->nz_used = irecv[0]; 1660 info->nz_allocated = irecv[1]; 1661 info->nz_unneeded = irecv[2]; 1662 info->memory = irecv[3]; 1663 info->mallocs = irecv[4]; 1664 } 1665 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1666 info->fill_ratio_needed = 0; 1667 info->factor_mallocs = 0; 1668 PetscFunctionReturn(0); 1669 } 1670 1671 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1672 { 1673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1674 PetscErrorCode ierr; 1675 1676 PetscFunctionBegin; 1677 switch (op) { 1678 case MAT_NEW_NONZERO_LOCATIONS: 1679 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1680 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1681 case MAT_KEEP_NONZERO_PATTERN: 1682 case MAT_NEW_NONZERO_LOCATION_ERR: 1683 case MAT_USE_INODES: 1684 case MAT_IGNORE_ZERO_ENTRIES: 1685 case MAT_FORM_EXPLICIT_TRANSPOSE: 1686 MatCheckPreallocated(A,1); 1687 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1688 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1689 break; 1690 case MAT_ROW_ORIENTED: 1691 MatCheckPreallocated(A,1); 1692 a->roworiented = flg; 1693 1694 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1695 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1696 break; 1697 case MAT_FORCE_DIAGONAL_ENTRIES: 1698 case MAT_SORTED_FULL: 1699 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1700 break; 1701 case MAT_IGNORE_OFF_PROC_ENTRIES: 1702 a->donotstash = flg; 1703 break; 1704 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1705 case MAT_SPD: 1706 case MAT_SYMMETRIC: 1707 case MAT_STRUCTURALLY_SYMMETRIC: 1708 case MAT_HERMITIAN: 1709 case MAT_SYMMETRY_ETERNAL: 1710 break; 1711 case MAT_SUBMAT_SINGLEIS: 1712 A->submat_singleis = flg; 1713 break; 1714 case MAT_STRUCTURE_ONLY: 1715 /* The option is handled directly by MatSetOption() */ 1716 break; 1717 default: 1718 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1719 } 1720 PetscFunctionReturn(0); 1721 } 1722 1723 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1724 { 1725 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1726 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1727 PetscErrorCode ierr; 1728 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1729 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1730 PetscInt *cmap,*idx_p; 1731 1732 PetscFunctionBegin; 1733 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1734 mat->getrowactive = PETSC_TRUE; 1735 1736 if (!mat->rowvalues && (idx || v)) { 1737 /* 1738 allocate enough space to hold information from the longest row. 1739 */ 1740 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1741 PetscInt max = 1,tmp; 1742 for (i=0; i<matin->rmap->n; i++) { 1743 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1744 if (max < tmp) max = tmp; 1745 } 1746 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1747 } 1748 1749 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1750 lrow = row - rstart; 1751 1752 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1753 if (!v) {pvA = NULL; pvB = NULL;} 1754 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1755 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1756 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1757 nztot = nzA + nzB; 1758 1759 cmap = mat->garray; 1760 if (v || idx) { 1761 if (nztot) { 1762 /* Sort by increasing column numbers, assuming A and B already sorted */ 1763 PetscInt imark = -1; 1764 if (v) { 1765 *v = v_p = mat->rowvalues; 1766 for (i=0; i<nzB; i++) { 1767 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1768 else break; 1769 } 1770 imark = i; 1771 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1772 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1773 } 1774 if (idx) { 1775 *idx = idx_p = mat->rowindices; 1776 if (imark > -1) { 1777 for (i=0; i<imark; i++) { 1778 idx_p[i] = cmap[cworkB[i]]; 1779 } 1780 } else { 1781 for (i=0; i<nzB; i++) { 1782 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1783 else break; 1784 } 1785 imark = i; 1786 } 1787 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1788 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1789 } 1790 } else { 1791 if (idx) *idx = NULL; 1792 if (v) *v = NULL; 1793 } 1794 } 1795 *nz = nztot; 1796 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1797 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1798 PetscFunctionReturn(0); 1799 } 1800 1801 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1802 { 1803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1804 1805 PetscFunctionBegin; 1806 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1807 aij->getrowactive = PETSC_FALSE; 1808 PetscFunctionReturn(0); 1809 } 1810 1811 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1812 { 1813 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1814 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1815 PetscErrorCode ierr; 1816 PetscInt i,j,cstart = mat->cmap->rstart; 1817 PetscReal sum = 0.0; 1818 MatScalar *v; 1819 1820 PetscFunctionBegin; 1821 if (aij->size == 1) { 1822 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1823 } else { 1824 if (type == NORM_FROBENIUS) { 1825 v = amat->a; 1826 for (i=0; i<amat->nz; i++) { 1827 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1828 } 1829 v = bmat->a; 1830 for (i=0; i<bmat->nz; i++) { 1831 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1832 } 1833 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1834 *norm = PetscSqrtReal(*norm); 1835 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1836 } else if (type == NORM_1) { /* max column norm */ 1837 PetscReal *tmp,*tmp2; 1838 PetscInt *jj,*garray = aij->garray; 1839 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1840 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1841 *norm = 0.0; 1842 v = amat->a; jj = amat->j; 1843 for (j=0; j<amat->nz; j++) { 1844 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1845 } 1846 v = bmat->a; jj = bmat->j; 1847 for (j=0; j<bmat->nz; j++) { 1848 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1849 } 1850 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1851 for (j=0; j<mat->cmap->N; j++) { 1852 if (tmp2[j] > *norm) *norm = tmp2[j]; 1853 } 1854 ierr = PetscFree(tmp);CHKERRQ(ierr); 1855 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1856 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1857 } else if (type == NORM_INFINITY) { /* max row norm */ 1858 PetscReal ntemp = 0.0; 1859 for (j=0; j<aij->A->rmap->n; j++) { 1860 v = amat->a + amat->i[j]; 1861 sum = 0.0; 1862 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1863 sum += PetscAbsScalar(*v); v++; 1864 } 1865 v = bmat->a + bmat->i[j]; 1866 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1867 sum += PetscAbsScalar(*v); v++; 1868 } 1869 if (sum > ntemp) ntemp = sum; 1870 } 1871 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1872 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1873 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1874 } 1875 PetscFunctionReturn(0); 1876 } 1877 1878 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1879 { 1880 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1881 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1882 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1883 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1884 PetscErrorCode ierr; 1885 Mat B,A_diag,*B_diag; 1886 const MatScalar *pbv,*bv; 1887 1888 PetscFunctionBegin; 1889 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1890 ai = Aloc->i; aj = Aloc->j; 1891 bi = Bloc->i; bj = Bloc->j; 1892 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1893 PetscInt *d_nnz,*g_nnz,*o_nnz; 1894 PetscSFNode *oloc; 1895 PETSC_UNUSED PetscSF sf; 1896 1897 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1898 /* compute d_nnz for preallocation */ 1899 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1900 for (i=0; i<ai[ma]; i++) { 1901 d_nnz[aj[i]]++; 1902 } 1903 /* compute local off-diagonal contributions */ 1904 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1905 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1906 /* map those to global */ 1907 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1908 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1909 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1910 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1911 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1912 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1913 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1914 1915 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1916 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1917 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1918 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1919 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1920 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1921 } else { 1922 B = *matout; 1923 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1924 } 1925 1926 b = (Mat_MPIAIJ*)B->data; 1927 A_diag = a->A; 1928 B_diag = &b->A; 1929 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1930 A_diag_ncol = A_diag->cmap->N; 1931 B_diag_ilen = sub_B_diag->ilen; 1932 B_diag_i = sub_B_diag->i; 1933 1934 /* Set ilen for diagonal of B */ 1935 for (i=0; i<A_diag_ncol; i++) { 1936 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1937 } 1938 1939 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1940 very quickly (=without using MatSetValues), because all writes are local. */ 1941 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1942 1943 /* copy over the B part */ 1944 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1945 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1946 pbv = bv; 1947 row = A->rmap->rstart; 1948 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1949 cols_tmp = cols; 1950 for (i=0; i<mb; i++) { 1951 ncol = bi[i+1]-bi[i]; 1952 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1953 row++; 1954 pbv += ncol; cols_tmp += ncol; 1955 } 1956 ierr = PetscFree(cols);CHKERRQ(ierr); 1957 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1958 1959 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1960 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1961 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1962 *matout = B; 1963 } else { 1964 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1965 } 1966 PetscFunctionReturn(0); 1967 } 1968 1969 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1970 { 1971 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1972 Mat a = aij->A,b = aij->B; 1973 PetscErrorCode ierr; 1974 PetscInt s1,s2,s3; 1975 1976 PetscFunctionBegin; 1977 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1978 if (rr) { 1979 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1980 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1981 /* Overlap communication with computation. */ 1982 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1983 } 1984 if (ll) { 1985 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1986 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1987 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1988 } 1989 /* scale the diagonal block */ 1990 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1991 1992 if (rr) { 1993 /* Do a scatter end and then right scale the off-diagonal block */ 1994 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1995 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 1996 } 1997 PetscFunctionReturn(0); 1998 } 1999 2000 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2001 { 2002 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2003 PetscErrorCode ierr; 2004 2005 PetscFunctionBegin; 2006 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2007 PetscFunctionReturn(0); 2008 } 2009 2010 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2011 { 2012 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2013 Mat a,b,c,d; 2014 PetscBool flg; 2015 PetscErrorCode ierr; 2016 2017 PetscFunctionBegin; 2018 a = matA->A; b = matA->B; 2019 c = matB->A; d = matB->B; 2020 2021 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2022 if (flg) { 2023 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2024 } 2025 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2030 { 2031 PetscErrorCode ierr; 2032 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2033 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2034 2035 PetscFunctionBegin; 2036 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2037 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2038 /* because of the column compression in the off-processor part of the matrix a->B, 2039 the number of columns in a->B and b->B may be different, hence we cannot call 2040 the MatCopy() directly on the two parts. If need be, we can provide a more 2041 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2042 then copying the submatrices */ 2043 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2044 } else { 2045 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2046 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2047 } 2048 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2049 PetscFunctionReturn(0); 2050 } 2051 2052 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2053 { 2054 PetscErrorCode ierr; 2055 2056 PetscFunctionBegin; 2057 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 /* 2062 Computes the number of nonzeros per row needed for preallocation when X and Y 2063 have different nonzero structure. 2064 */ 2065 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2066 { 2067 PetscInt i,j,k,nzx,nzy; 2068 2069 PetscFunctionBegin; 2070 /* Set the number of nonzeros in the new matrix */ 2071 for (i=0; i<m; i++) { 2072 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2073 nzx = xi[i+1] - xi[i]; 2074 nzy = yi[i+1] - yi[i]; 2075 nnz[i] = 0; 2076 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2077 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2078 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2079 nnz[i]++; 2080 } 2081 for (; k<nzy; k++) nnz[i]++; 2082 } 2083 PetscFunctionReturn(0); 2084 } 2085 2086 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2087 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2088 { 2089 PetscErrorCode ierr; 2090 PetscInt m = Y->rmap->N; 2091 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2092 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2093 2094 PetscFunctionBegin; 2095 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2100 { 2101 PetscErrorCode ierr; 2102 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2103 2104 PetscFunctionBegin; 2105 if (str == SAME_NONZERO_PATTERN) { 2106 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2107 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2108 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2109 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2110 } else { 2111 Mat B; 2112 PetscInt *nnz_d,*nnz_o; 2113 2114 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2115 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2116 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2117 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2118 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2119 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2120 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2121 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2123 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2124 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2125 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2126 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2127 } 2128 PetscFunctionReturn(0); 2129 } 2130 2131 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2132 2133 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2134 { 2135 #if defined(PETSC_USE_COMPLEX) 2136 PetscErrorCode ierr; 2137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2138 2139 PetscFunctionBegin; 2140 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2141 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2142 #else 2143 PetscFunctionBegin; 2144 #endif 2145 PetscFunctionReturn(0); 2146 } 2147 2148 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 PetscErrorCode ierr; 2152 2153 PetscFunctionBegin; 2154 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2155 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2160 { 2161 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2162 PetscErrorCode ierr; 2163 2164 PetscFunctionBegin; 2165 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2166 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2171 { 2172 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2173 PetscErrorCode ierr; 2174 PetscInt i,*idxb = NULL,m = A->rmap->n; 2175 PetscScalar *va,*vv; 2176 Vec vB,vA; 2177 const PetscScalar *vb; 2178 2179 PetscFunctionBegin; 2180 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2181 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2182 2183 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2184 if (idx) { 2185 for (i=0; i<m; i++) { 2186 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2187 } 2188 } 2189 2190 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2191 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2192 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2193 2194 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2195 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2196 for (i=0; i<m; i++) { 2197 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2198 vv[i] = vb[i]; 2199 if (idx) idx[i] = a->garray[idxb[i]]; 2200 } else { 2201 vv[i] = va[i]; 2202 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2203 idx[i] = a->garray[idxb[i]]; 2204 } 2205 } 2206 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2207 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2208 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2209 ierr = PetscFree(idxb);CHKERRQ(ierr); 2210 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2211 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2212 PetscFunctionReturn(0); 2213 } 2214 2215 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2216 { 2217 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2218 PetscInt m = A->rmap->n,n = A->cmap->n; 2219 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2220 PetscInt *cmap = mat->garray; 2221 PetscInt *diagIdx, *offdiagIdx; 2222 Vec diagV, offdiagV; 2223 PetscScalar *a, *diagA, *offdiagA; 2224 const PetscScalar *ba,*bav; 2225 PetscInt r,j,col,ncols,*bi,*bj; 2226 PetscErrorCode ierr; 2227 Mat B = mat->B; 2228 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2229 2230 PetscFunctionBegin; 2231 /* When a process holds entire A and other processes have no entry */ 2232 if (A->cmap->N == n) { 2233 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2234 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2235 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2236 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2237 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2238 PetscFunctionReturn(0); 2239 } else if (n == 0) { 2240 if (m) { 2241 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2242 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2243 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2244 } 2245 PetscFunctionReturn(0); 2246 } 2247 2248 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2249 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2250 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2251 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2252 2253 /* Get offdiagIdx[] for implicit 0.0 */ 2254 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2255 ba = bav; 2256 bi = b->i; 2257 bj = b->j; 2258 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2259 for (r = 0; r < m; r++) { 2260 ncols = bi[r+1] - bi[r]; 2261 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2262 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2263 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2264 offdiagA[r] = 0.0; 2265 2266 /* Find first hole in the cmap */ 2267 for (j=0; j<ncols; j++) { 2268 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2269 if (col > j && j < cstart) { 2270 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2271 break; 2272 } else if (col > j + n && j >= cstart) { 2273 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2274 break; 2275 } 2276 } 2277 if (j == ncols && ncols < A->cmap->N - n) { 2278 /* a hole is outside compressed Bcols */ 2279 if (ncols == 0) { 2280 if (cstart) { 2281 offdiagIdx[r] = 0; 2282 } else offdiagIdx[r] = cend; 2283 } else { /* ncols > 0 */ 2284 offdiagIdx[r] = cmap[ncols-1] + 1; 2285 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2286 } 2287 } 2288 } 2289 2290 for (j=0; j<ncols; j++) { 2291 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2292 ba++; bj++; 2293 } 2294 } 2295 2296 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2297 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2298 for (r = 0; r < m; ++r) { 2299 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2300 a[r] = diagA[r]; 2301 if (idx) idx[r] = cstart + diagIdx[r]; 2302 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2303 a[r] = diagA[r]; 2304 if (idx) { 2305 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2306 idx[r] = cstart + diagIdx[r]; 2307 } else idx[r] = offdiagIdx[r]; 2308 } 2309 } else { 2310 a[r] = offdiagA[r]; 2311 if (idx) idx[r] = offdiagIdx[r]; 2312 } 2313 } 2314 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2315 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2316 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2317 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2318 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2319 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2320 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2321 PetscFunctionReturn(0); 2322 } 2323 2324 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2325 { 2326 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2327 PetscInt m = A->rmap->n,n = A->cmap->n; 2328 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2329 PetscInt *cmap = mat->garray; 2330 PetscInt *diagIdx, *offdiagIdx; 2331 Vec diagV, offdiagV; 2332 PetscScalar *a, *diagA, *offdiagA; 2333 const PetscScalar *ba,*bav; 2334 PetscInt r,j,col,ncols,*bi,*bj; 2335 PetscErrorCode ierr; 2336 Mat B = mat->B; 2337 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2338 2339 PetscFunctionBegin; 2340 /* When a process holds entire A and other processes have no entry */ 2341 if (A->cmap->N == n) { 2342 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2343 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2344 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2345 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2346 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2347 PetscFunctionReturn(0); 2348 } else if (n == 0) { 2349 if (m) { 2350 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2351 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2352 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2353 } 2354 PetscFunctionReturn(0); 2355 } 2356 2357 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2358 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2359 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2360 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2361 2362 /* Get offdiagIdx[] for implicit 0.0 */ 2363 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2364 ba = bav; 2365 bi = b->i; 2366 bj = b->j; 2367 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2368 for (r = 0; r < m; r++) { 2369 ncols = bi[r+1] - bi[r]; 2370 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2371 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2372 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2373 offdiagA[r] = 0.0; 2374 2375 /* Find first hole in the cmap */ 2376 for (j=0; j<ncols; j++) { 2377 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2378 if (col > j && j < cstart) { 2379 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2380 break; 2381 } else if (col > j + n && j >= cstart) { 2382 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2383 break; 2384 } 2385 } 2386 if (j == ncols && ncols < A->cmap->N - n) { 2387 /* a hole is outside compressed Bcols */ 2388 if (ncols == 0) { 2389 if (cstart) { 2390 offdiagIdx[r] = 0; 2391 } else offdiagIdx[r] = cend; 2392 } else { /* ncols > 0 */ 2393 offdiagIdx[r] = cmap[ncols-1] + 1; 2394 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2395 } 2396 } 2397 } 2398 2399 for (j=0; j<ncols; j++) { 2400 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2401 ba++; bj++; 2402 } 2403 } 2404 2405 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2406 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2407 for (r = 0; r < m; ++r) { 2408 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 if (idx) idx[r] = cstart + diagIdx[r]; 2411 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2412 a[r] = diagA[r]; 2413 if (idx) { 2414 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2415 idx[r] = cstart + diagIdx[r]; 2416 } else idx[r] = offdiagIdx[r]; 2417 } 2418 } else { 2419 a[r] = offdiagA[r]; 2420 if (idx) idx[r] = offdiagIdx[r]; 2421 } 2422 } 2423 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2424 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2425 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2426 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2427 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2428 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2429 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2434 { 2435 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2436 PetscInt m = A->rmap->n,n = A->cmap->n; 2437 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2438 PetscInt *cmap = mat->garray; 2439 PetscInt *diagIdx, *offdiagIdx; 2440 Vec diagV, offdiagV; 2441 PetscScalar *a, *diagA, *offdiagA; 2442 const PetscScalar *ba,*bav; 2443 PetscInt r,j,col,ncols,*bi,*bj; 2444 PetscErrorCode ierr; 2445 Mat B = mat->B; 2446 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2447 2448 PetscFunctionBegin; 2449 /* When a process holds entire A and other processes have no entry */ 2450 if (A->cmap->N == n) { 2451 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2452 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2453 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2454 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2455 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2456 PetscFunctionReturn(0); 2457 } else if (n == 0) { 2458 if (m) { 2459 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2460 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2461 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2462 } 2463 PetscFunctionReturn(0); 2464 } 2465 2466 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2467 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2468 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2469 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2470 2471 /* Get offdiagIdx[] for implicit 0.0 */ 2472 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2473 ba = bav; 2474 bi = b->i; 2475 bj = b->j; 2476 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2477 for (r = 0; r < m; r++) { 2478 ncols = bi[r+1] - bi[r]; 2479 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2480 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2481 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2482 offdiagA[r] = 0.0; 2483 2484 /* Find first hole in the cmap */ 2485 for (j=0; j<ncols; j++) { 2486 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2487 if (col > j && j < cstart) { 2488 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2489 break; 2490 } else if (col > j + n && j >= cstart) { 2491 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2492 break; 2493 } 2494 } 2495 if (j == ncols && ncols < A->cmap->N - n) { 2496 /* a hole is outside compressed Bcols */ 2497 if (ncols == 0) { 2498 if (cstart) { 2499 offdiagIdx[r] = 0; 2500 } else offdiagIdx[r] = cend; 2501 } else { /* ncols > 0 */ 2502 offdiagIdx[r] = cmap[ncols-1] + 1; 2503 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2504 } 2505 } 2506 } 2507 2508 for (j=0; j<ncols; j++) { 2509 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2510 ba++; bj++; 2511 } 2512 } 2513 2514 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2515 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2516 for (r = 0; r < m; ++r) { 2517 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2518 a[r] = diagA[r]; 2519 if (idx) idx[r] = cstart + diagIdx[r]; 2520 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2521 a[r] = diagA[r]; 2522 if (idx) { 2523 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2524 idx[r] = cstart + diagIdx[r]; 2525 } else idx[r] = offdiagIdx[r]; 2526 } 2527 } else { 2528 a[r] = offdiagA[r]; 2529 if (idx) idx[r] = offdiagIdx[r]; 2530 } 2531 } 2532 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2533 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2534 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2535 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2536 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2537 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2538 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2543 { 2544 PetscErrorCode ierr; 2545 Mat *dummy; 2546 2547 PetscFunctionBegin; 2548 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2549 *newmat = *dummy; 2550 ierr = PetscFree(dummy);CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 2554 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2555 { 2556 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2557 PetscErrorCode ierr; 2558 2559 PetscFunctionBegin; 2560 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2561 A->factorerrortype = a->A->factorerrortype; 2562 PetscFunctionReturn(0); 2563 } 2564 2565 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2566 { 2567 PetscErrorCode ierr; 2568 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2569 2570 PetscFunctionBegin; 2571 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2572 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2573 if (x->assembled) { 2574 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2575 } else { 2576 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2577 } 2578 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2579 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2580 PetscFunctionReturn(0); 2581 } 2582 2583 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2584 { 2585 PetscFunctionBegin; 2586 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2587 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2588 PetscFunctionReturn(0); 2589 } 2590 2591 /*@ 2592 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2593 2594 Collective on Mat 2595 2596 Input Parameters: 2597 + A - the matrix 2598 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2599 2600 Level: advanced 2601 2602 @*/ 2603 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2604 { 2605 PetscErrorCode ierr; 2606 2607 PetscFunctionBegin; 2608 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2609 PetscFunctionReturn(0); 2610 } 2611 2612 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2613 { 2614 PetscErrorCode ierr; 2615 PetscBool sc = PETSC_FALSE,flg; 2616 2617 PetscFunctionBegin; 2618 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2619 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2620 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2621 if (flg) { 2622 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2623 } 2624 ierr = PetscOptionsTail();CHKERRQ(ierr); 2625 PetscFunctionReturn(0); 2626 } 2627 2628 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2629 { 2630 PetscErrorCode ierr; 2631 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2632 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2633 2634 PetscFunctionBegin; 2635 if (!Y->preallocated) { 2636 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2637 } else if (!aij->nz) { 2638 PetscInt nonew = aij->nonew; 2639 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2640 aij->nonew = nonew; 2641 } 2642 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2647 { 2648 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2649 PetscErrorCode ierr; 2650 2651 PetscFunctionBegin; 2652 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2653 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2654 if (d) { 2655 PetscInt rstart; 2656 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2657 *d += rstart; 2658 2659 } 2660 PetscFunctionReturn(0); 2661 } 2662 2663 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2664 { 2665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2666 PetscErrorCode ierr; 2667 2668 PetscFunctionBegin; 2669 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2670 PetscFunctionReturn(0); 2671 } 2672 2673 /* -------------------------------------------------------------------*/ 2674 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2675 MatGetRow_MPIAIJ, 2676 MatRestoreRow_MPIAIJ, 2677 MatMult_MPIAIJ, 2678 /* 4*/ MatMultAdd_MPIAIJ, 2679 MatMultTranspose_MPIAIJ, 2680 MatMultTransposeAdd_MPIAIJ, 2681 NULL, 2682 NULL, 2683 NULL, 2684 /*10*/ NULL, 2685 NULL, 2686 NULL, 2687 MatSOR_MPIAIJ, 2688 MatTranspose_MPIAIJ, 2689 /*15*/ MatGetInfo_MPIAIJ, 2690 MatEqual_MPIAIJ, 2691 MatGetDiagonal_MPIAIJ, 2692 MatDiagonalScale_MPIAIJ, 2693 MatNorm_MPIAIJ, 2694 /*20*/ MatAssemblyBegin_MPIAIJ, 2695 MatAssemblyEnd_MPIAIJ, 2696 MatSetOption_MPIAIJ, 2697 MatZeroEntries_MPIAIJ, 2698 /*24*/ MatZeroRows_MPIAIJ, 2699 NULL, 2700 NULL, 2701 NULL, 2702 NULL, 2703 /*29*/ MatSetUp_MPIAIJ, 2704 NULL, 2705 NULL, 2706 MatGetDiagonalBlock_MPIAIJ, 2707 NULL, 2708 /*34*/ MatDuplicate_MPIAIJ, 2709 NULL, 2710 NULL, 2711 NULL, 2712 NULL, 2713 /*39*/ MatAXPY_MPIAIJ, 2714 MatCreateSubMatrices_MPIAIJ, 2715 MatIncreaseOverlap_MPIAIJ, 2716 MatGetValues_MPIAIJ, 2717 MatCopy_MPIAIJ, 2718 /*44*/ MatGetRowMax_MPIAIJ, 2719 MatScale_MPIAIJ, 2720 MatShift_MPIAIJ, 2721 MatDiagonalSet_MPIAIJ, 2722 MatZeroRowsColumns_MPIAIJ, 2723 /*49*/ MatSetRandom_MPIAIJ, 2724 NULL, 2725 NULL, 2726 NULL, 2727 NULL, 2728 /*54*/ MatFDColoringCreate_MPIXAIJ, 2729 NULL, 2730 MatSetUnfactored_MPIAIJ, 2731 MatPermute_MPIAIJ, 2732 NULL, 2733 /*59*/ MatCreateSubMatrix_MPIAIJ, 2734 MatDestroy_MPIAIJ, 2735 MatView_MPIAIJ, 2736 NULL, 2737 NULL, 2738 /*64*/ NULL, 2739 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2740 NULL, 2741 NULL, 2742 NULL, 2743 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2744 MatGetRowMinAbs_MPIAIJ, 2745 NULL, 2746 NULL, 2747 NULL, 2748 NULL, 2749 /*75*/ MatFDColoringApply_AIJ, 2750 MatSetFromOptions_MPIAIJ, 2751 NULL, 2752 NULL, 2753 MatFindZeroDiagonals_MPIAIJ, 2754 /*80*/ NULL, 2755 NULL, 2756 NULL, 2757 /*83*/ MatLoad_MPIAIJ, 2758 MatIsSymmetric_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*89*/ NULL, 2764 NULL, 2765 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2766 NULL, 2767 NULL, 2768 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2769 NULL, 2770 NULL, 2771 NULL, 2772 MatBindToCPU_MPIAIJ, 2773 /*99*/ MatProductSetFromOptions_MPIAIJ, 2774 NULL, 2775 NULL, 2776 MatConjugate_MPIAIJ, 2777 NULL, 2778 /*104*/MatSetValuesRow_MPIAIJ, 2779 MatRealPart_MPIAIJ, 2780 MatImaginaryPart_MPIAIJ, 2781 NULL, 2782 NULL, 2783 /*109*/NULL, 2784 NULL, 2785 MatGetRowMin_MPIAIJ, 2786 NULL, 2787 MatMissingDiagonal_MPIAIJ, 2788 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2789 NULL, 2790 MatGetGhosts_MPIAIJ, 2791 NULL, 2792 NULL, 2793 /*119*/MatMultDiagonalBlock_MPIAIJ, 2794 NULL, 2795 NULL, 2796 NULL, 2797 MatGetMultiProcBlock_MPIAIJ, 2798 /*124*/MatFindNonzeroRows_MPIAIJ, 2799 MatGetColumnReductions_MPIAIJ, 2800 MatInvertBlockDiagonal_MPIAIJ, 2801 MatInvertVariableBlockDiagonal_MPIAIJ, 2802 MatCreateSubMatricesMPI_MPIAIJ, 2803 /*129*/NULL, 2804 NULL, 2805 NULL, 2806 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2807 NULL, 2808 /*134*/NULL, 2809 NULL, 2810 NULL, 2811 NULL, 2812 NULL, 2813 /*139*/MatSetBlockSizes_MPIAIJ, 2814 NULL, 2815 NULL, 2816 MatFDColoringSetUp_MPIXAIJ, 2817 MatFindOffBlockDiagonalEntries_MPIAIJ, 2818 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2819 /*145*/NULL, 2820 NULL, 2821 NULL 2822 }; 2823 2824 /* ----------------------------------------------------------------------------------------*/ 2825 2826 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2827 { 2828 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2829 PetscErrorCode ierr; 2830 2831 PetscFunctionBegin; 2832 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2833 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2834 PetscFunctionReturn(0); 2835 } 2836 2837 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2838 { 2839 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2840 PetscErrorCode ierr; 2841 2842 PetscFunctionBegin; 2843 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2844 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2845 PetscFunctionReturn(0); 2846 } 2847 2848 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2849 { 2850 Mat_MPIAIJ *b; 2851 PetscErrorCode ierr; 2852 PetscMPIInt size; 2853 2854 PetscFunctionBegin; 2855 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2856 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2857 b = (Mat_MPIAIJ*)B->data; 2858 2859 #if defined(PETSC_USE_CTABLE) 2860 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2861 #else 2862 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2863 #endif 2864 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2865 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2866 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2867 2868 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2869 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2870 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2871 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2872 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2873 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2874 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2875 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2876 2877 if (!B->preallocated) { 2878 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2879 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2880 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2881 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2882 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2883 } 2884 2885 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2886 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2887 B->preallocated = PETSC_TRUE; 2888 B->was_assembled = PETSC_FALSE; 2889 B->assembled = PETSC_FALSE; 2890 PetscFunctionReturn(0); 2891 } 2892 2893 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2894 { 2895 Mat_MPIAIJ *b; 2896 PetscErrorCode ierr; 2897 2898 PetscFunctionBegin; 2899 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2900 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2901 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2902 b = (Mat_MPIAIJ*)B->data; 2903 2904 #if defined(PETSC_USE_CTABLE) 2905 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2906 #else 2907 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2908 #endif 2909 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2910 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2911 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2912 2913 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2914 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2915 B->preallocated = PETSC_TRUE; 2916 B->was_assembled = PETSC_FALSE; 2917 B->assembled = PETSC_FALSE; 2918 PetscFunctionReturn(0); 2919 } 2920 2921 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2922 { 2923 Mat mat; 2924 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2925 PetscErrorCode ierr; 2926 2927 PetscFunctionBegin; 2928 *newmat = NULL; 2929 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2930 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2931 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2932 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2933 a = (Mat_MPIAIJ*)mat->data; 2934 2935 mat->factortype = matin->factortype; 2936 mat->assembled = matin->assembled; 2937 mat->insertmode = NOT_SET_VALUES; 2938 mat->preallocated = matin->preallocated; 2939 2940 a->size = oldmat->size; 2941 a->rank = oldmat->rank; 2942 a->donotstash = oldmat->donotstash; 2943 a->roworiented = oldmat->roworiented; 2944 a->rowindices = NULL; 2945 a->rowvalues = NULL; 2946 a->getrowactive = PETSC_FALSE; 2947 2948 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2949 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2950 2951 if (oldmat->colmap) { 2952 #if defined(PETSC_USE_CTABLE) 2953 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2954 #else 2955 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2956 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2957 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2958 #endif 2959 } else a->colmap = NULL; 2960 if (oldmat->garray) { 2961 PetscInt len; 2962 len = oldmat->B->cmap->n; 2963 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2964 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2965 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2966 } else a->garray = NULL; 2967 2968 /* It may happen MatDuplicate is called with a non-assembled matrix 2969 In fact, MatDuplicate only requires the matrix to be preallocated 2970 This may happen inside a DMCreateMatrix_Shell */ 2971 if (oldmat->lvec) { 2972 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2973 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2974 } 2975 if (oldmat->Mvctx) { 2976 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2977 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2978 } 2979 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2980 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2981 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2982 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2983 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2984 *newmat = mat; 2985 PetscFunctionReturn(0); 2986 } 2987 2988 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2989 { 2990 PetscBool isbinary, ishdf5; 2991 PetscErrorCode ierr; 2992 2993 PetscFunctionBegin; 2994 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2995 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2996 /* force binary viewer to load .info file if it has not yet done so */ 2997 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2998 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2999 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3000 if (isbinary) { 3001 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3002 } else if (ishdf5) { 3003 #if defined(PETSC_HAVE_HDF5) 3004 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3005 #else 3006 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3007 #endif 3008 } else { 3009 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3010 } 3011 PetscFunctionReturn(0); 3012 } 3013 3014 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3015 { 3016 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3017 PetscInt *rowidxs,*colidxs; 3018 PetscScalar *matvals; 3019 PetscErrorCode ierr; 3020 3021 PetscFunctionBegin; 3022 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3023 3024 /* read in matrix header */ 3025 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3026 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3027 M = header[1]; N = header[2]; nz = header[3]; 3028 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3029 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3030 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3031 3032 /* set block sizes from the viewer's .info file */ 3033 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3034 /* set global sizes if not set already */ 3035 if (mat->rmap->N < 0) mat->rmap->N = M; 3036 if (mat->cmap->N < 0) mat->cmap->N = N; 3037 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3038 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3039 3040 /* check if the matrix sizes are correct */ 3041 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3042 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3043 3044 /* read in row lengths and build row indices */ 3045 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3046 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3047 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3048 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3049 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3050 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3051 /* read in column indices and matrix values */ 3052 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3053 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3054 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3055 /* store matrix indices and values */ 3056 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3057 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3058 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3059 PetscFunctionReturn(0); 3060 } 3061 3062 /* Not scalable because of ISAllGather() unless getting all columns. */ 3063 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3064 { 3065 PetscErrorCode ierr; 3066 IS iscol_local; 3067 PetscBool isstride; 3068 PetscMPIInt lisstride=0,gisstride; 3069 3070 PetscFunctionBegin; 3071 /* check if we are grabbing all columns*/ 3072 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3073 3074 if (isstride) { 3075 PetscInt start,len,mstart,mlen; 3076 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3077 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3078 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3079 if (mstart == start && mlen-mstart == len) lisstride = 1; 3080 } 3081 3082 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3083 if (gisstride) { 3084 PetscInt N; 3085 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3086 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3087 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3088 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3089 } else { 3090 PetscInt cbs; 3091 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3092 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3093 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3094 } 3095 3096 *isseq = iscol_local; 3097 PetscFunctionReturn(0); 3098 } 3099 3100 /* 3101 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3102 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3103 3104 Input Parameters: 3105 mat - matrix 3106 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3107 i.e., mat->rstart <= isrow[i] < mat->rend 3108 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3109 i.e., mat->cstart <= iscol[i] < mat->cend 3110 Output Parameter: 3111 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3112 iscol_o - sequential column index set for retrieving mat->B 3113 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3114 */ 3115 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3116 { 3117 PetscErrorCode ierr; 3118 Vec x,cmap; 3119 const PetscInt *is_idx; 3120 PetscScalar *xarray,*cmaparray; 3121 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3122 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3123 Mat B=a->B; 3124 Vec lvec=a->lvec,lcmap; 3125 PetscInt i,cstart,cend,Bn=B->cmap->N; 3126 MPI_Comm comm; 3127 VecScatter Mvctx=a->Mvctx; 3128 3129 PetscFunctionBegin; 3130 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3131 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3132 3133 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3134 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3135 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3136 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3137 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3138 3139 /* Get start indices */ 3140 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3141 isstart -= ncols; 3142 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3143 3144 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3145 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3146 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3147 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3148 for (i=0; i<ncols; i++) { 3149 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3150 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3151 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3152 } 3153 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3154 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3155 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3156 3157 /* Get iscol_d */ 3158 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3159 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3160 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3161 3162 /* Get isrow_d */ 3163 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3164 rstart = mat->rmap->rstart; 3165 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3166 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3167 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3168 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3169 3170 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3171 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3172 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3173 3174 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3175 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3176 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3177 3178 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3179 3180 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3181 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3182 3183 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3184 /* off-process column indices */ 3185 count = 0; 3186 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3187 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3188 3189 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3190 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3191 for (i=0; i<Bn; i++) { 3192 if (PetscRealPart(xarray[i]) > -1.0) { 3193 idx[count] = i; /* local column index in off-diagonal part B */ 3194 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3195 count++; 3196 } 3197 } 3198 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3199 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3200 3201 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3202 /* cannot ensure iscol_o has same blocksize as iscol! */ 3203 3204 ierr = PetscFree(idx);CHKERRQ(ierr); 3205 *garray = cmap1; 3206 3207 ierr = VecDestroy(&x);CHKERRQ(ierr); 3208 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3209 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3210 PetscFunctionReturn(0); 3211 } 3212 3213 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3214 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3215 { 3216 PetscErrorCode ierr; 3217 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3218 Mat M = NULL; 3219 MPI_Comm comm; 3220 IS iscol_d,isrow_d,iscol_o; 3221 Mat Asub = NULL,Bsub = NULL; 3222 PetscInt n; 3223 3224 PetscFunctionBegin; 3225 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3226 3227 if (call == MAT_REUSE_MATRIX) { 3228 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3229 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3230 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3231 3232 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3233 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3234 3235 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3236 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3237 3238 /* Update diagonal and off-diagonal portions of submat */ 3239 asub = (Mat_MPIAIJ*)(*submat)->data; 3240 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3241 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3242 if (n) { 3243 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3244 } 3245 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3246 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3247 3248 } else { /* call == MAT_INITIAL_MATRIX) */ 3249 const PetscInt *garray; 3250 PetscInt BsubN; 3251 3252 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3253 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3254 3255 /* Create local submatrices Asub and Bsub */ 3256 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3257 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3258 3259 /* Create submatrix M */ 3260 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3261 3262 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3263 asub = (Mat_MPIAIJ*)M->data; 3264 3265 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3266 n = asub->B->cmap->N; 3267 if (BsubN > n) { 3268 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3269 const PetscInt *idx; 3270 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3271 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3272 3273 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3274 j = 0; 3275 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3276 for (i=0; i<n; i++) { 3277 if (j >= BsubN) break; 3278 while (subgarray[i] > garray[j]) j++; 3279 3280 if (subgarray[i] == garray[j]) { 3281 idx_new[i] = idx[j++]; 3282 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3283 } 3284 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3285 3286 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3287 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3288 3289 } else if (BsubN < n) { 3290 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3291 } 3292 3293 ierr = PetscFree(garray);CHKERRQ(ierr); 3294 *submat = M; 3295 3296 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3297 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3298 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3299 3300 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3301 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3302 3303 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3304 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3305 } 3306 PetscFunctionReturn(0); 3307 } 3308 3309 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3310 { 3311 PetscErrorCode ierr; 3312 IS iscol_local=NULL,isrow_d; 3313 PetscInt csize; 3314 PetscInt n,i,j,start,end; 3315 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3316 MPI_Comm comm; 3317 3318 PetscFunctionBegin; 3319 /* If isrow has same processor distribution as mat, 3320 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3321 if (call == MAT_REUSE_MATRIX) { 3322 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3323 if (isrow_d) { 3324 sameRowDist = PETSC_TRUE; 3325 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3326 } else { 3327 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3328 if (iscol_local) { 3329 sameRowDist = PETSC_TRUE; 3330 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3331 } 3332 } 3333 } else { 3334 /* Check if isrow has same processor distribution as mat */ 3335 sameDist[0] = PETSC_FALSE; 3336 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3337 if (!n) { 3338 sameDist[0] = PETSC_TRUE; 3339 } else { 3340 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3341 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3342 if (i >= start && j < end) { 3343 sameDist[0] = PETSC_TRUE; 3344 } 3345 } 3346 3347 /* Check if iscol has same processor distribution as mat */ 3348 sameDist[1] = PETSC_FALSE; 3349 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3350 if (!n) { 3351 sameDist[1] = PETSC_TRUE; 3352 } else { 3353 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3354 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3355 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3356 } 3357 3358 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3359 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3360 sameRowDist = tsameDist[0]; 3361 } 3362 3363 if (sameRowDist) { 3364 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3365 /* isrow and iscol have same processor distribution as mat */ 3366 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3367 PetscFunctionReturn(0); 3368 } else { /* sameRowDist */ 3369 /* isrow has same processor distribution as mat */ 3370 if (call == MAT_INITIAL_MATRIX) { 3371 PetscBool sorted; 3372 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3373 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3374 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3375 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3376 3377 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3378 if (sorted) { 3379 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3380 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3381 PetscFunctionReturn(0); 3382 } 3383 } else { /* call == MAT_REUSE_MATRIX */ 3384 IS iscol_sub; 3385 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3386 if (iscol_sub) { 3387 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3388 PetscFunctionReturn(0); 3389 } 3390 } 3391 } 3392 } 3393 3394 /* General case: iscol -> iscol_local which has global size of iscol */ 3395 if (call == MAT_REUSE_MATRIX) { 3396 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3397 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3398 } else { 3399 if (!iscol_local) { 3400 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3401 } 3402 } 3403 3404 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3405 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3406 3407 if (call == MAT_INITIAL_MATRIX) { 3408 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3409 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3410 } 3411 PetscFunctionReturn(0); 3412 } 3413 3414 /*@C 3415 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3416 and "off-diagonal" part of the matrix in CSR format. 3417 3418 Collective 3419 3420 Input Parameters: 3421 + comm - MPI communicator 3422 . A - "diagonal" portion of matrix 3423 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3424 - garray - global index of B columns 3425 3426 Output Parameter: 3427 . mat - the matrix, with input A as its local diagonal matrix 3428 Level: advanced 3429 3430 Notes: 3431 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3432 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3433 3434 .seealso: MatCreateMPIAIJWithSplitArrays() 3435 @*/ 3436 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3437 { 3438 PetscErrorCode ierr; 3439 Mat_MPIAIJ *maij; 3440 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3441 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3442 const PetscScalar *oa; 3443 Mat Bnew; 3444 PetscInt m,n,N; 3445 3446 PetscFunctionBegin; 3447 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3448 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3449 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3450 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3451 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3452 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3453 3454 /* Get global columns of mat */ 3455 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3456 3457 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3458 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3459 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3460 maij = (Mat_MPIAIJ*)(*mat)->data; 3461 3462 (*mat)->preallocated = PETSC_TRUE; 3463 3464 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3465 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3466 3467 /* Set A as diagonal portion of *mat */ 3468 maij->A = A; 3469 3470 nz = oi[m]; 3471 for (i=0; i<nz; i++) { 3472 col = oj[i]; 3473 oj[i] = garray[col]; 3474 } 3475 3476 /* Set Bnew as off-diagonal portion of *mat */ 3477 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3478 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3479 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3480 bnew = (Mat_SeqAIJ*)Bnew->data; 3481 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3482 maij->B = Bnew; 3483 3484 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3485 3486 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3487 b->free_a = PETSC_FALSE; 3488 b->free_ij = PETSC_FALSE; 3489 ierr = MatDestroy(&B);CHKERRQ(ierr); 3490 3491 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3492 bnew->free_a = PETSC_TRUE; 3493 bnew->free_ij = PETSC_TRUE; 3494 3495 /* condense columns of maij->B */ 3496 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3497 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3498 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3499 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3500 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3501 PetscFunctionReturn(0); 3502 } 3503 3504 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3505 3506 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3507 { 3508 PetscErrorCode ierr; 3509 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3510 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3511 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3512 Mat M,Msub,B=a->B; 3513 MatScalar *aa; 3514 Mat_SeqAIJ *aij; 3515 PetscInt *garray = a->garray,*colsub,Ncols; 3516 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3517 IS iscol_sub,iscmap; 3518 const PetscInt *is_idx,*cmap; 3519 PetscBool allcolumns=PETSC_FALSE; 3520 MPI_Comm comm; 3521 3522 PetscFunctionBegin; 3523 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3524 if (call == MAT_REUSE_MATRIX) { 3525 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3526 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3527 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3528 3529 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3530 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3531 3532 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3533 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3534 3535 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3536 3537 } else { /* call == MAT_INITIAL_MATRIX) */ 3538 PetscBool flg; 3539 3540 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3541 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3542 3543 /* (1) iscol -> nonscalable iscol_local */ 3544 /* Check for special case: each processor gets entire matrix columns */ 3545 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3546 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3547 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3548 if (allcolumns) { 3549 iscol_sub = iscol_local; 3550 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3551 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3552 3553 } else { 3554 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3555 PetscInt *idx,*cmap1,k; 3556 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3557 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3558 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3559 count = 0; 3560 k = 0; 3561 for (i=0; i<Ncols; i++) { 3562 j = is_idx[i]; 3563 if (j >= cstart && j < cend) { 3564 /* diagonal part of mat */ 3565 idx[count] = j; 3566 cmap1[count++] = i; /* column index in submat */ 3567 } else if (Bn) { 3568 /* off-diagonal part of mat */ 3569 if (j == garray[k]) { 3570 idx[count] = j; 3571 cmap1[count++] = i; /* column index in submat */ 3572 } else if (j > garray[k]) { 3573 while (j > garray[k] && k < Bn-1) k++; 3574 if (j == garray[k]) { 3575 idx[count] = j; 3576 cmap1[count++] = i; /* column index in submat */ 3577 } 3578 } 3579 } 3580 } 3581 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3582 3583 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3584 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3585 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3586 3587 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3588 } 3589 3590 /* (3) Create sequential Msub */ 3591 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3592 } 3593 3594 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3595 aij = (Mat_SeqAIJ*)(Msub)->data; 3596 ii = aij->i; 3597 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3598 3599 /* 3600 m - number of local rows 3601 Ncols - number of columns (same on all processors) 3602 rstart - first row in new global matrix generated 3603 */ 3604 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3605 3606 if (call == MAT_INITIAL_MATRIX) { 3607 /* (4) Create parallel newmat */ 3608 PetscMPIInt rank,size; 3609 PetscInt csize; 3610 3611 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3612 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3613 3614 /* 3615 Determine the number of non-zeros in the diagonal and off-diagonal 3616 portions of the matrix in order to do correct preallocation 3617 */ 3618 3619 /* first get start and end of "diagonal" columns */ 3620 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3621 if (csize == PETSC_DECIDE) { 3622 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3623 if (mglobal == Ncols) { /* square matrix */ 3624 nlocal = m; 3625 } else { 3626 nlocal = Ncols/size + ((Ncols % size) > rank); 3627 } 3628 } else { 3629 nlocal = csize; 3630 } 3631 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3632 rstart = rend - nlocal; 3633 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3634 3635 /* next, compute all the lengths */ 3636 jj = aij->j; 3637 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3638 olens = dlens + m; 3639 for (i=0; i<m; i++) { 3640 jend = ii[i+1] - ii[i]; 3641 olen = 0; 3642 dlen = 0; 3643 for (j=0; j<jend; j++) { 3644 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3645 else dlen++; 3646 jj++; 3647 } 3648 olens[i] = olen; 3649 dlens[i] = dlen; 3650 } 3651 3652 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3653 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3654 3655 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3656 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3657 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3658 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3659 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3660 ierr = PetscFree(dlens);CHKERRQ(ierr); 3661 3662 } else { /* call == MAT_REUSE_MATRIX */ 3663 M = *newmat; 3664 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3665 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3666 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3667 /* 3668 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3669 rather than the slower MatSetValues(). 3670 */ 3671 M->was_assembled = PETSC_TRUE; 3672 M->assembled = PETSC_FALSE; 3673 } 3674 3675 /* (5) Set values of Msub to *newmat */ 3676 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3677 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3678 3679 jj = aij->j; 3680 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3681 for (i=0; i<m; i++) { 3682 row = rstart + i; 3683 nz = ii[i+1] - ii[i]; 3684 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3685 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3686 jj += nz; aa += nz; 3687 } 3688 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3689 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3690 3691 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3693 3694 ierr = PetscFree(colsub);CHKERRQ(ierr); 3695 3696 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3697 if (call == MAT_INITIAL_MATRIX) { 3698 *newmat = M; 3699 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3700 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3701 3702 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3703 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3704 3705 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3706 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3707 3708 if (iscol_local) { 3709 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3710 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3711 } 3712 } 3713 PetscFunctionReturn(0); 3714 } 3715 3716 /* 3717 Not great since it makes two copies of the submatrix, first an SeqAIJ 3718 in local and then by concatenating the local matrices the end result. 3719 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3720 3721 Note: This requires a sequential iscol with all indices. 3722 */ 3723 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3724 { 3725 PetscErrorCode ierr; 3726 PetscMPIInt rank,size; 3727 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3728 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3729 Mat M,Mreuse; 3730 MatScalar *aa,*vwork; 3731 MPI_Comm comm; 3732 Mat_SeqAIJ *aij; 3733 PetscBool colflag,allcolumns=PETSC_FALSE; 3734 3735 PetscFunctionBegin; 3736 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3737 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3738 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3739 3740 /* Check for special case: each processor gets entire matrix columns */ 3741 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3742 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3743 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3744 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3745 3746 if (call == MAT_REUSE_MATRIX) { 3747 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3748 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3749 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3750 } else { 3751 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3752 } 3753 3754 /* 3755 m - number of local rows 3756 n - number of columns (same on all processors) 3757 rstart - first row in new global matrix generated 3758 */ 3759 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3760 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3761 if (call == MAT_INITIAL_MATRIX) { 3762 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3763 ii = aij->i; 3764 jj = aij->j; 3765 3766 /* 3767 Determine the number of non-zeros in the diagonal and off-diagonal 3768 portions of the matrix in order to do correct preallocation 3769 */ 3770 3771 /* first get start and end of "diagonal" columns */ 3772 if (csize == PETSC_DECIDE) { 3773 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3774 if (mglobal == n) { /* square matrix */ 3775 nlocal = m; 3776 } else { 3777 nlocal = n/size + ((n % size) > rank); 3778 } 3779 } else { 3780 nlocal = csize; 3781 } 3782 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3783 rstart = rend - nlocal; 3784 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3785 3786 /* next, compute all the lengths */ 3787 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3788 olens = dlens + m; 3789 for (i=0; i<m; i++) { 3790 jend = ii[i+1] - ii[i]; 3791 olen = 0; 3792 dlen = 0; 3793 for (j=0; j<jend; j++) { 3794 if (*jj < rstart || *jj >= rend) olen++; 3795 else dlen++; 3796 jj++; 3797 } 3798 olens[i] = olen; 3799 dlens[i] = dlen; 3800 } 3801 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3802 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3803 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3804 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3805 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3806 ierr = PetscFree(dlens);CHKERRQ(ierr); 3807 } else { 3808 PetscInt ml,nl; 3809 3810 M = *newmat; 3811 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3812 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3813 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3814 /* 3815 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3816 rather than the slower MatSetValues(). 3817 */ 3818 M->was_assembled = PETSC_TRUE; 3819 M->assembled = PETSC_FALSE; 3820 } 3821 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3822 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3823 ii = aij->i; 3824 jj = aij->j; 3825 3826 /* trigger copy to CPU if needed */ 3827 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3828 for (i=0; i<m; i++) { 3829 row = rstart + i; 3830 nz = ii[i+1] - ii[i]; 3831 cwork = jj; jj += nz; 3832 vwork = aa; aa += nz; 3833 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3834 } 3835 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3836 3837 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3838 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3839 *newmat = M; 3840 3841 /* save submatrix used in processor for next request */ 3842 if (call == MAT_INITIAL_MATRIX) { 3843 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3844 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3845 } 3846 PetscFunctionReturn(0); 3847 } 3848 3849 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3850 { 3851 PetscInt m,cstart, cend,j,nnz,i,d; 3852 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3853 const PetscInt *JJ; 3854 PetscErrorCode ierr; 3855 PetscBool nooffprocentries; 3856 3857 PetscFunctionBegin; 3858 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3859 3860 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3861 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3862 m = B->rmap->n; 3863 cstart = B->cmap->rstart; 3864 cend = B->cmap->rend; 3865 rstart = B->rmap->rstart; 3866 3867 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3868 3869 if (PetscDefined(USE_DEBUG)) { 3870 for (i=0; i<m; i++) { 3871 nnz = Ii[i+1]- Ii[i]; 3872 JJ = J + Ii[i]; 3873 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3874 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3875 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3876 } 3877 } 3878 3879 for (i=0; i<m; i++) { 3880 nnz = Ii[i+1]- Ii[i]; 3881 JJ = J + Ii[i]; 3882 nnz_max = PetscMax(nnz_max,nnz); 3883 d = 0; 3884 for (j=0; j<nnz; j++) { 3885 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3886 } 3887 d_nnz[i] = d; 3888 o_nnz[i] = nnz - d; 3889 } 3890 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3891 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3892 3893 for (i=0; i<m; i++) { 3894 ii = i + rstart; 3895 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3896 } 3897 nooffprocentries = B->nooffprocentries; 3898 B->nooffprocentries = PETSC_TRUE; 3899 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3900 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3901 B->nooffprocentries = nooffprocentries; 3902 3903 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3904 PetscFunctionReturn(0); 3905 } 3906 3907 /*@ 3908 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3909 (the default parallel PETSc format). 3910 3911 Collective 3912 3913 Input Parameters: 3914 + B - the matrix 3915 . i - the indices into j for the start of each local row (starts with zero) 3916 . j - the column indices for each local row (starts with zero) 3917 - v - optional values in the matrix 3918 3919 Level: developer 3920 3921 Notes: 3922 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3923 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3924 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3925 3926 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3927 3928 The format which is used for the sparse matrix input, is equivalent to a 3929 row-major ordering.. i.e for the following matrix, the input data expected is 3930 as shown 3931 3932 $ 1 0 0 3933 $ 2 0 3 P0 3934 $ ------- 3935 $ 4 5 6 P1 3936 $ 3937 $ Process0 [P0]: rows_owned=[0,1] 3938 $ i = {0,1,3} [size = nrow+1 = 2+1] 3939 $ j = {0,0,2} [size = 3] 3940 $ v = {1,2,3} [size = 3] 3941 $ 3942 $ Process1 [P1]: rows_owned=[2] 3943 $ i = {0,3} [size = nrow+1 = 1+1] 3944 $ j = {0,1,2} [size = 3] 3945 $ v = {4,5,6} [size = 3] 3946 3947 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3948 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3949 @*/ 3950 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3951 { 3952 PetscErrorCode ierr; 3953 3954 PetscFunctionBegin; 3955 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3956 PetscFunctionReturn(0); 3957 } 3958 3959 /*@C 3960 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3961 (the default parallel PETSc format). For good matrix assembly performance 3962 the user should preallocate the matrix storage by setting the parameters 3963 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3964 performance can be increased by more than a factor of 50. 3965 3966 Collective 3967 3968 Input Parameters: 3969 + B - the matrix 3970 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3971 (same value is used for all local rows) 3972 . d_nnz - array containing the number of nonzeros in the various rows of the 3973 DIAGONAL portion of the local submatrix (possibly different for each row) 3974 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3975 The size of this array is equal to the number of local rows, i.e 'm'. 3976 For matrices that will be factored, you must leave room for (and set) 3977 the diagonal entry even if it is zero. 3978 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3979 submatrix (same value is used for all local rows). 3980 - o_nnz - array containing the number of nonzeros in the various rows of the 3981 OFF-DIAGONAL portion of the local submatrix (possibly different for 3982 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3983 structure. The size of this array is equal to the number 3984 of local rows, i.e 'm'. 3985 3986 If the *_nnz parameter is given then the *_nz parameter is ignored 3987 3988 The AIJ format (also called the Yale sparse matrix format or 3989 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3990 storage. The stored row and column indices begin with zero. 3991 See Users-Manual: ch_mat for details. 3992 3993 The parallel matrix is partitioned such that the first m0 rows belong to 3994 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3995 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3996 3997 The DIAGONAL portion of the local submatrix of a processor can be defined 3998 as the submatrix which is obtained by extraction the part corresponding to 3999 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4000 first row that belongs to the processor, r2 is the last row belonging to 4001 the this processor, and c1-c2 is range of indices of the local part of a 4002 vector suitable for applying the matrix to. This is an mxn matrix. In the 4003 common case of a square matrix, the row and column ranges are the same and 4004 the DIAGONAL part is also square. The remaining portion of the local 4005 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4006 4007 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4008 4009 You can call MatGetInfo() to get information on how effective the preallocation was; 4010 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4011 You can also run with the option -info and look for messages with the string 4012 malloc in them to see if additional memory allocation was needed. 4013 4014 Example usage: 4015 4016 Consider the following 8x8 matrix with 34 non-zero values, that is 4017 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4018 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4019 as follows: 4020 4021 .vb 4022 1 2 0 | 0 3 0 | 0 4 4023 Proc0 0 5 6 | 7 0 0 | 8 0 4024 9 0 10 | 11 0 0 | 12 0 4025 ------------------------------------- 4026 13 0 14 | 15 16 17 | 0 0 4027 Proc1 0 18 0 | 19 20 21 | 0 0 4028 0 0 0 | 22 23 0 | 24 0 4029 ------------------------------------- 4030 Proc2 25 26 27 | 0 0 28 | 29 0 4031 30 0 0 | 31 32 33 | 0 34 4032 .ve 4033 4034 This can be represented as a collection of submatrices as: 4035 4036 .vb 4037 A B C 4038 D E F 4039 G H I 4040 .ve 4041 4042 Where the submatrices A,B,C are owned by proc0, D,E,F are 4043 owned by proc1, G,H,I are owned by proc2. 4044 4045 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4046 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4047 The 'M','N' parameters are 8,8, and have the same values on all procs. 4048 4049 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4050 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4051 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4052 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4053 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4054 matrix, ans [DF] as another SeqAIJ matrix. 4055 4056 When d_nz, o_nz parameters are specified, d_nz storage elements are 4057 allocated for every row of the local diagonal submatrix, and o_nz 4058 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4059 One way to choose d_nz and o_nz is to use the max nonzerors per local 4060 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4061 In this case, the values of d_nz,o_nz are: 4062 .vb 4063 proc0 : dnz = 2, o_nz = 2 4064 proc1 : dnz = 3, o_nz = 2 4065 proc2 : dnz = 1, o_nz = 4 4066 .ve 4067 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4068 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4069 for proc3. i.e we are using 12+15+10=37 storage locations to store 4070 34 values. 4071 4072 When d_nnz, o_nnz parameters are specified, the storage is specified 4073 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4074 In the above case the values for d_nnz,o_nnz are: 4075 .vb 4076 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4077 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4078 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4079 .ve 4080 Here the space allocated is sum of all the above values i.e 34, and 4081 hence pre-allocation is perfect. 4082 4083 Level: intermediate 4084 4085 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4086 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4087 @*/ 4088 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4089 { 4090 PetscErrorCode ierr; 4091 4092 PetscFunctionBegin; 4093 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4094 PetscValidType(B,1); 4095 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4096 PetscFunctionReturn(0); 4097 } 4098 4099 /*@ 4100 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4101 CSR format for the local rows. 4102 4103 Collective 4104 4105 Input Parameters: 4106 + comm - MPI communicator 4107 . m - number of local rows (Cannot be PETSC_DECIDE) 4108 . n - This value should be the same as the local size used in creating the 4109 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4110 calculated if N is given) For square matrices n is almost always m. 4111 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4112 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4113 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4114 . j - column indices 4115 - a - matrix values 4116 4117 Output Parameter: 4118 . mat - the matrix 4119 4120 Level: intermediate 4121 4122 Notes: 4123 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4124 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4125 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4126 4127 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4128 4129 The format which is used for the sparse matrix input, is equivalent to a 4130 row-major ordering.. i.e for the following matrix, the input data expected is 4131 as shown 4132 4133 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4134 4135 $ 1 0 0 4136 $ 2 0 3 P0 4137 $ ------- 4138 $ 4 5 6 P1 4139 $ 4140 $ Process0 [P0]: rows_owned=[0,1] 4141 $ i = {0,1,3} [size = nrow+1 = 2+1] 4142 $ j = {0,0,2} [size = 3] 4143 $ v = {1,2,3} [size = 3] 4144 $ 4145 $ Process1 [P1]: rows_owned=[2] 4146 $ i = {0,3} [size = nrow+1 = 1+1] 4147 $ j = {0,1,2} [size = 3] 4148 $ v = {4,5,6} [size = 3] 4149 4150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4151 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4152 @*/ 4153 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4154 { 4155 PetscErrorCode ierr; 4156 4157 PetscFunctionBegin; 4158 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4159 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4160 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4161 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4162 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4163 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4164 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4165 PetscFunctionReturn(0); 4166 } 4167 4168 /*@ 4169 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4170 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4171 4172 Collective 4173 4174 Input Parameters: 4175 + mat - the matrix 4176 . m - number of local rows (Cannot be PETSC_DECIDE) 4177 . n - This value should be the same as the local size used in creating the 4178 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4179 calculated if N is given) For square matrices n is almost always m. 4180 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4181 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4182 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4183 . J - column indices 4184 - v - matrix values 4185 4186 Level: intermediate 4187 4188 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4189 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4190 @*/ 4191 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4192 { 4193 PetscErrorCode ierr; 4194 PetscInt cstart,nnz,i,j; 4195 PetscInt *ld; 4196 PetscBool nooffprocentries; 4197 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4198 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4199 PetscScalar *ad = Ad->a, *ao = Ao->a; 4200 const PetscInt *Adi = Ad->i; 4201 PetscInt ldi,Iii,md; 4202 4203 PetscFunctionBegin; 4204 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4205 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4206 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4207 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4208 4209 cstart = mat->cmap->rstart; 4210 if (!Aij->ld) { 4211 /* count number of entries below block diagonal */ 4212 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4213 Aij->ld = ld; 4214 for (i=0; i<m; i++) { 4215 nnz = Ii[i+1]- Ii[i]; 4216 j = 0; 4217 while (J[j] < cstart && j < nnz) {j++;} 4218 J += nnz; 4219 ld[i] = j; 4220 } 4221 } else { 4222 ld = Aij->ld; 4223 } 4224 4225 for (i=0; i<m; i++) { 4226 nnz = Ii[i+1]- Ii[i]; 4227 Iii = Ii[i]; 4228 ldi = ld[i]; 4229 md = Adi[i+1]-Adi[i]; 4230 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4231 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4232 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4233 ad += md; 4234 ao += nnz - md; 4235 } 4236 nooffprocentries = mat->nooffprocentries; 4237 mat->nooffprocentries = PETSC_TRUE; 4238 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4239 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4240 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4241 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4242 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4243 mat->nooffprocentries = nooffprocentries; 4244 PetscFunctionReturn(0); 4245 } 4246 4247 /*@C 4248 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4249 (the default parallel PETSc format). For good matrix assembly performance 4250 the user should preallocate the matrix storage by setting the parameters 4251 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4252 performance can be increased by more than a factor of 50. 4253 4254 Collective 4255 4256 Input Parameters: 4257 + comm - MPI communicator 4258 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4259 This value should be the same as the local size used in creating the 4260 y vector for the matrix-vector product y = Ax. 4261 . n - This value should be the same as the local size used in creating the 4262 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4263 calculated if N is given) For square matrices n is almost always m. 4264 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4265 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4266 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4267 (same value is used for all local rows) 4268 . d_nnz - array containing the number of nonzeros in the various rows of the 4269 DIAGONAL portion of the local submatrix (possibly different for each row) 4270 or NULL, if d_nz is used to specify the nonzero structure. 4271 The size of this array is equal to the number of local rows, i.e 'm'. 4272 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4273 submatrix (same value is used for all local rows). 4274 - o_nnz - array containing the number of nonzeros in the various rows of the 4275 OFF-DIAGONAL portion of the local submatrix (possibly different for 4276 each row) or NULL, if o_nz is used to specify the nonzero 4277 structure. The size of this array is equal to the number 4278 of local rows, i.e 'm'. 4279 4280 Output Parameter: 4281 . A - the matrix 4282 4283 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4284 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4285 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4286 4287 Notes: 4288 If the *_nnz parameter is given then the *_nz parameter is ignored 4289 4290 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4291 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4292 storage requirements for this matrix. 4293 4294 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4295 processor than it must be used on all processors that share the object for 4296 that argument. 4297 4298 The user MUST specify either the local or global matrix dimensions 4299 (possibly both). 4300 4301 The parallel matrix is partitioned across processors such that the 4302 first m0 rows belong to process 0, the next m1 rows belong to 4303 process 1, the next m2 rows belong to process 2 etc.. where 4304 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4305 values corresponding to [m x N] submatrix. 4306 4307 The columns are logically partitioned with the n0 columns belonging 4308 to 0th partition, the next n1 columns belonging to the next 4309 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4310 4311 The DIAGONAL portion of the local submatrix on any given processor 4312 is the submatrix corresponding to the rows and columns m,n 4313 corresponding to the given processor. i.e diagonal matrix on 4314 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4315 etc. The remaining portion of the local submatrix [m x (N-n)] 4316 constitute the OFF-DIAGONAL portion. The example below better 4317 illustrates this concept. 4318 4319 For a square global matrix we define each processor's diagonal portion 4320 to be its local rows and the corresponding columns (a square submatrix); 4321 each processor's off-diagonal portion encompasses the remainder of the 4322 local matrix (a rectangular submatrix). 4323 4324 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4325 4326 When calling this routine with a single process communicator, a matrix of 4327 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4328 type of communicator, use the construction mechanism 4329 .vb 4330 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4331 .ve 4332 4333 $ MatCreate(...,&A); 4334 $ MatSetType(A,MATMPIAIJ); 4335 $ MatSetSizes(A, m,n,M,N); 4336 $ MatMPIAIJSetPreallocation(A,...); 4337 4338 By default, this format uses inodes (identical nodes) when possible. 4339 We search for consecutive rows with the same nonzero structure, thereby 4340 reusing matrix information to achieve increased efficiency. 4341 4342 Options Database Keys: 4343 + -mat_no_inode - Do not use inodes 4344 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4345 4346 Example usage: 4347 4348 Consider the following 8x8 matrix with 34 non-zero values, that is 4349 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4350 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4351 as follows 4352 4353 .vb 4354 1 2 0 | 0 3 0 | 0 4 4355 Proc0 0 5 6 | 7 0 0 | 8 0 4356 9 0 10 | 11 0 0 | 12 0 4357 ------------------------------------- 4358 13 0 14 | 15 16 17 | 0 0 4359 Proc1 0 18 0 | 19 20 21 | 0 0 4360 0 0 0 | 22 23 0 | 24 0 4361 ------------------------------------- 4362 Proc2 25 26 27 | 0 0 28 | 29 0 4363 30 0 0 | 31 32 33 | 0 34 4364 .ve 4365 4366 This can be represented as a collection of submatrices as 4367 4368 .vb 4369 A B C 4370 D E F 4371 G H I 4372 .ve 4373 4374 Where the submatrices A,B,C are owned by proc0, D,E,F are 4375 owned by proc1, G,H,I are owned by proc2. 4376 4377 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4378 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4379 The 'M','N' parameters are 8,8, and have the same values on all procs. 4380 4381 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4382 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4383 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4384 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4385 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4386 matrix, ans [DF] as another SeqAIJ matrix. 4387 4388 When d_nz, o_nz parameters are specified, d_nz storage elements are 4389 allocated for every row of the local diagonal submatrix, and o_nz 4390 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4391 One way to choose d_nz and o_nz is to use the max nonzerors per local 4392 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4393 In this case, the values of d_nz,o_nz are 4394 .vb 4395 proc0 : dnz = 2, o_nz = 2 4396 proc1 : dnz = 3, o_nz = 2 4397 proc2 : dnz = 1, o_nz = 4 4398 .ve 4399 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4400 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4401 for proc3. i.e we are using 12+15+10=37 storage locations to store 4402 34 values. 4403 4404 When d_nnz, o_nnz parameters are specified, the storage is specified 4405 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4406 In the above case the values for d_nnz,o_nnz are 4407 .vb 4408 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4409 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4410 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4411 .ve 4412 Here the space allocated is sum of all the above values i.e 34, and 4413 hence pre-allocation is perfect. 4414 4415 Level: intermediate 4416 4417 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4418 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4419 @*/ 4420 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4421 { 4422 PetscErrorCode ierr; 4423 PetscMPIInt size; 4424 4425 PetscFunctionBegin; 4426 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4427 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4428 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4429 if (size > 1) { 4430 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4431 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4432 } else { 4433 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4434 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4435 } 4436 PetscFunctionReturn(0); 4437 } 4438 4439 /*@C 4440 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4441 4442 Not collective 4443 4444 Input Parameter: 4445 . A - The MPIAIJ matrix 4446 4447 Output Parameters: 4448 + Ad - The local diagonal block as a SeqAIJ matrix 4449 . Ao - The local off-diagonal block as a SeqAIJ matrix 4450 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4451 4452 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4453 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4454 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4455 local column numbers to global column numbers in the original matrix. 4456 4457 Level: intermediate 4458 4459 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4460 @*/ 4461 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4462 { 4463 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4464 PetscBool flg; 4465 PetscErrorCode ierr; 4466 4467 PetscFunctionBegin; 4468 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4469 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4470 if (Ad) *Ad = a->A; 4471 if (Ao) *Ao = a->B; 4472 if (colmap) *colmap = a->garray; 4473 PetscFunctionReturn(0); 4474 } 4475 4476 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4477 { 4478 PetscErrorCode ierr; 4479 PetscInt m,N,i,rstart,nnz,Ii; 4480 PetscInt *indx; 4481 PetscScalar *values; 4482 4483 PetscFunctionBegin; 4484 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4485 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4486 PetscInt *dnz,*onz,sum,bs,cbs; 4487 4488 if (n == PETSC_DECIDE) { 4489 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4490 } 4491 /* Check sum(n) = N */ 4492 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4493 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4494 4495 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4496 rstart -= m; 4497 4498 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4499 for (i=0; i<m; i++) { 4500 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4501 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4502 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4503 } 4504 4505 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4506 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4507 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4508 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4509 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4510 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4511 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4512 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4513 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4514 } 4515 4516 /* numeric phase */ 4517 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4518 for (i=0; i<m; i++) { 4519 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4520 Ii = i + rstart; 4521 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4522 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4523 } 4524 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4525 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4526 PetscFunctionReturn(0); 4527 } 4528 4529 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4530 { 4531 PetscErrorCode ierr; 4532 PetscMPIInt rank; 4533 PetscInt m,N,i,rstart,nnz; 4534 size_t len; 4535 const PetscInt *indx; 4536 PetscViewer out; 4537 char *name; 4538 Mat B; 4539 const PetscScalar *values; 4540 4541 PetscFunctionBegin; 4542 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4543 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4544 /* Should this be the type of the diagonal block of A? */ 4545 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4546 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4547 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4548 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4549 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4550 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4551 for (i=0; i<m; i++) { 4552 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4553 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4554 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4555 } 4556 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4557 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4558 4559 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4560 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4561 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4562 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4563 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4564 ierr = PetscFree(name);CHKERRQ(ierr); 4565 ierr = MatView(B,out);CHKERRQ(ierr); 4566 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4567 ierr = MatDestroy(&B);CHKERRQ(ierr); 4568 PetscFunctionReturn(0); 4569 } 4570 4571 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4572 { 4573 PetscErrorCode ierr; 4574 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4575 4576 PetscFunctionBegin; 4577 if (!merge) PetscFunctionReturn(0); 4578 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4580 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4581 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4582 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4583 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4584 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4585 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4586 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4587 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4588 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4589 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4590 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4591 ierr = PetscFree(merge);CHKERRQ(ierr); 4592 PetscFunctionReturn(0); 4593 } 4594 4595 #include <../src/mat/utils/freespace.h> 4596 #include <petscbt.h> 4597 4598 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4599 { 4600 PetscErrorCode ierr; 4601 MPI_Comm comm; 4602 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4603 PetscMPIInt size,rank,taga,*len_s; 4604 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4605 PetscInt proc,m; 4606 PetscInt **buf_ri,**buf_rj; 4607 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4608 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4609 MPI_Request *s_waits,*r_waits; 4610 MPI_Status *status; 4611 MatScalar *aa=a->a; 4612 MatScalar **abuf_r,*ba_i; 4613 Mat_Merge_SeqsToMPI *merge; 4614 PetscContainer container; 4615 4616 PetscFunctionBegin; 4617 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4618 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4619 4620 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4621 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4622 4623 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4624 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4625 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4626 4627 bi = merge->bi; 4628 bj = merge->bj; 4629 buf_ri = merge->buf_ri; 4630 buf_rj = merge->buf_rj; 4631 4632 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4633 owners = merge->rowmap->range; 4634 len_s = merge->len_s; 4635 4636 /* send and recv matrix values */ 4637 /*-----------------------------*/ 4638 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4639 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4640 4641 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4642 for (proc=0,k=0; proc<size; proc++) { 4643 if (!len_s[proc]) continue; 4644 i = owners[proc]; 4645 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4646 k++; 4647 } 4648 4649 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4650 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4651 ierr = PetscFree(status);CHKERRQ(ierr); 4652 4653 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4654 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4655 4656 /* insert mat values of mpimat */ 4657 /*----------------------------*/ 4658 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4659 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4660 4661 for (k=0; k<merge->nrecv; k++) { 4662 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4663 nrows = *(buf_ri_k[k]); 4664 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4665 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4666 } 4667 4668 /* set values of ba */ 4669 m = merge->rowmap->n; 4670 for (i=0; i<m; i++) { 4671 arow = owners[rank] + i; 4672 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4673 bnzi = bi[i+1] - bi[i]; 4674 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4675 4676 /* add local non-zero vals of this proc's seqmat into ba */ 4677 anzi = ai[arow+1] - ai[arow]; 4678 aj = a->j + ai[arow]; 4679 aa = a->a + ai[arow]; 4680 nextaj = 0; 4681 for (j=0; nextaj<anzi; j++) { 4682 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4683 ba_i[j] += aa[nextaj++]; 4684 } 4685 } 4686 4687 /* add received vals into ba */ 4688 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4689 /* i-th row */ 4690 if (i == *nextrow[k]) { 4691 anzi = *(nextai[k]+1) - *nextai[k]; 4692 aj = buf_rj[k] + *(nextai[k]); 4693 aa = abuf_r[k] + *(nextai[k]); 4694 nextaj = 0; 4695 for (j=0; nextaj<anzi; j++) { 4696 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4697 ba_i[j] += aa[nextaj++]; 4698 } 4699 } 4700 nextrow[k]++; nextai[k]++; 4701 } 4702 } 4703 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4704 } 4705 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4706 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4707 4708 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4709 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4710 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4711 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4712 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4713 PetscFunctionReturn(0); 4714 } 4715 4716 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4717 { 4718 PetscErrorCode ierr; 4719 Mat B_mpi; 4720 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4721 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4722 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4723 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4724 PetscInt len,proc,*dnz,*onz,bs,cbs; 4725 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4726 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4727 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4728 MPI_Status *status; 4729 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4730 PetscBT lnkbt; 4731 Mat_Merge_SeqsToMPI *merge; 4732 PetscContainer container; 4733 4734 PetscFunctionBegin; 4735 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4736 4737 /* make sure it is a PETSc comm */ 4738 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4739 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4740 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4741 4742 ierr = PetscNew(&merge);CHKERRQ(ierr); 4743 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4744 4745 /* determine row ownership */ 4746 /*---------------------------------------------------------*/ 4747 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4748 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4749 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4750 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4751 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4752 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4753 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4754 4755 m = merge->rowmap->n; 4756 owners = merge->rowmap->range; 4757 4758 /* determine the number of messages to send, their lengths */ 4759 /*---------------------------------------------------------*/ 4760 len_s = merge->len_s; 4761 4762 len = 0; /* length of buf_si[] */ 4763 merge->nsend = 0; 4764 for (proc=0; proc<size; proc++) { 4765 len_si[proc] = 0; 4766 if (proc == rank) { 4767 len_s[proc] = 0; 4768 } else { 4769 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4770 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4771 } 4772 if (len_s[proc]) { 4773 merge->nsend++; 4774 nrows = 0; 4775 for (i=owners[proc]; i<owners[proc+1]; i++) { 4776 if (ai[i+1] > ai[i]) nrows++; 4777 } 4778 len_si[proc] = 2*(nrows+1); 4779 len += len_si[proc]; 4780 } 4781 } 4782 4783 /* determine the number and length of messages to receive for ij-structure */ 4784 /*-------------------------------------------------------------------------*/ 4785 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4786 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4787 4788 /* post the Irecv of j-structure */ 4789 /*-------------------------------*/ 4790 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4791 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4792 4793 /* post the Isend of j-structure */ 4794 /*--------------------------------*/ 4795 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4796 4797 for (proc=0, k=0; proc<size; proc++) { 4798 if (!len_s[proc]) continue; 4799 i = owners[proc]; 4800 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4801 k++; 4802 } 4803 4804 /* receives and sends of j-structure are complete */ 4805 /*------------------------------------------------*/ 4806 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4807 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4808 4809 /* send and recv i-structure */ 4810 /*---------------------------*/ 4811 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4812 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4813 4814 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4815 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4816 for (proc=0,k=0; proc<size; proc++) { 4817 if (!len_s[proc]) continue; 4818 /* form outgoing message for i-structure: 4819 buf_si[0]: nrows to be sent 4820 [1:nrows]: row index (global) 4821 [nrows+1:2*nrows+1]: i-structure index 4822 */ 4823 /*-------------------------------------------*/ 4824 nrows = len_si[proc]/2 - 1; 4825 buf_si_i = buf_si + nrows+1; 4826 buf_si[0] = nrows; 4827 buf_si_i[0] = 0; 4828 nrows = 0; 4829 for (i=owners[proc]; i<owners[proc+1]; i++) { 4830 anzi = ai[i+1] - ai[i]; 4831 if (anzi) { 4832 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4833 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4834 nrows++; 4835 } 4836 } 4837 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4838 k++; 4839 buf_si += len_si[proc]; 4840 } 4841 4842 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4843 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4844 4845 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4846 for (i=0; i<merge->nrecv; i++) { 4847 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4848 } 4849 4850 ierr = PetscFree(len_si);CHKERRQ(ierr); 4851 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4852 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4853 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4854 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4855 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4856 ierr = PetscFree(status);CHKERRQ(ierr); 4857 4858 /* compute a local seq matrix in each processor */ 4859 /*----------------------------------------------*/ 4860 /* allocate bi array and free space for accumulating nonzero column info */ 4861 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4862 bi[0] = 0; 4863 4864 /* create and initialize a linked list */ 4865 nlnk = N+1; 4866 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4867 4868 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4869 len = ai[owners[rank+1]] - ai[owners[rank]]; 4870 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4871 4872 current_space = free_space; 4873 4874 /* determine symbolic info for each local row */ 4875 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4876 4877 for (k=0; k<merge->nrecv; k++) { 4878 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4879 nrows = *buf_ri_k[k]; 4880 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4881 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4882 } 4883 4884 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4885 len = 0; 4886 for (i=0; i<m; i++) { 4887 bnzi = 0; 4888 /* add local non-zero cols of this proc's seqmat into lnk */ 4889 arow = owners[rank] + i; 4890 anzi = ai[arow+1] - ai[arow]; 4891 aj = a->j + ai[arow]; 4892 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4893 bnzi += nlnk; 4894 /* add received col data into lnk */ 4895 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4896 if (i == *nextrow[k]) { /* i-th row */ 4897 anzi = *(nextai[k]+1) - *nextai[k]; 4898 aj = buf_rj[k] + *nextai[k]; 4899 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4900 bnzi += nlnk; 4901 nextrow[k]++; nextai[k]++; 4902 } 4903 } 4904 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4905 4906 /* if free space is not available, make more free space */ 4907 if (current_space->local_remaining<bnzi) { 4908 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4909 nspacedouble++; 4910 } 4911 /* copy data into free space, then initialize lnk */ 4912 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4913 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4914 4915 current_space->array += bnzi; 4916 current_space->local_used += bnzi; 4917 current_space->local_remaining -= bnzi; 4918 4919 bi[i+1] = bi[i] + bnzi; 4920 } 4921 4922 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4923 4924 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4925 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4926 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4927 4928 /* create symbolic parallel matrix B_mpi */ 4929 /*---------------------------------------*/ 4930 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4931 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4932 if (n==PETSC_DECIDE) { 4933 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4934 } else { 4935 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4936 } 4937 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4938 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4939 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4940 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4941 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4942 4943 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4944 B_mpi->assembled = PETSC_FALSE; 4945 merge->bi = bi; 4946 merge->bj = bj; 4947 merge->buf_ri = buf_ri; 4948 merge->buf_rj = buf_rj; 4949 merge->coi = NULL; 4950 merge->coj = NULL; 4951 merge->owners_co = NULL; 4952 4953 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4954 4955 /* attach the supporting struct to B_mpi for reuse */ 4956 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4957 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4958 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4959 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4960 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4961 *mpimat = B_mpi; 4962 4963 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4964 PetscFunctionReturn(0); 4965 } 4966 4967 /*@C 4968 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4969 matrices from each processor 4970 4971 Collective 4972 4973 Input Parameters: 4974 + comm - the communicators the parallel matrix will live on 4975 . seqmat - the input sequential matrices 4976 . m - number of local rows (or PETSC_DECIDE) 4977 . n - number of local columns (or PETSC_DECIDE) 4978 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4979 4980 Output Parameter: 4981 . mpimat - the parallel matrix generated 4982 4983 Level: advanced 4984 4985 Notes: 4986 The dimensions of the sequential matrix in each processor MUST be the same. 4987 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4988 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4989 @*/ 4990 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4991 { 4992 PetscErrorCode ierr; 4993 PetscMPIInt size; 4994 4995 PetscFunctionBegin; 4996 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4997 if (size == 1) { 4998 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4999 if (scall == MAT_INITIAL_MATRIX) { 5000 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5001 } else { 5002 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5003 } 5004 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5005 PetscFunctionReturn(0); 5006 } 5007 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5008 if (scall == MAT_INITIAL_MATRIX) { 5009 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5010 } 5011 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5012 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5013 PetscFunctionReturn(0); 5014 } 5015 5016 /*@ 5017 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5018 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5019 with MatGetSize() 5020 5021 Not Collective 5022 5023 Input Parameters: 5024 + A - the matrix 5025 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5026 5027 Output Parameter: 5028 . A_loc - the local sequential matrix generated 5029 5030 Level: developer 5031 5032 Notes: 5033 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5034 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5035 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5036 modify the values of the returned A_loc. 5037 5038 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5039 @*/ 5040 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5041 { 5042 PetscErrorCode ierr; 5043 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5044 Mat_SeqAIJ *mat,*a,*b; 5045 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5046 const PetscScalar *aa,*ba,*aav,*bav; 5047 PetscScalar *ca,*cam; 5048 PetscMPIInt size; 5049 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5050 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5051 PetscBool match; 5052 5053 PetscFunctionBegin; 5054 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5055 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5056 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5057 if (size == 1) { 5058 if (scall == MAT_INITIAL_MATRIX) { 5059 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5060 *A_loc = mpimat->A; 5061 } else if (scall == MAT_REUSE_MATRIX) { 5062 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5063 } 5064 PetscFunctionReturn(0); 5065 } 5066 5067 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5068 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5069 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5070 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5071 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5072 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5073 aa = aav; 5074 ba = bav; 5075 if (scall == MAT_INITIAL_MATRIX) { 5076 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5077 ci[0] = 0; 5078 for (i=0; i<am; i++) { 5079 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5080 } 5081 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5082 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5083 k = 0; 5084 for (i=0; i<am; i++) { 5085 ncols_o = bi[i+1] - bi[i]; 5086 ncols_d = ai[i+1] - ai[i]; 5087 /* off-diagonal portion of A */ 5088 for (jo=0; jo<ncols_o; jo++) { 5089 col = cmap[*bj]; 5090 if (col >= cstart) break; 5091 cj[k] = col; bj++; 5092 ca[k++] = *ba++; 5093 } 5094 /* diagonal portion of A */ 5095 for (j=0; j<ncols_d; j++) { 5096 cj[k] = cstart + *aj++; 5097 ca[k++] = *aa++; 5098 } 5099 /* off-diagonal portion of A */ 5100 for (j=jo; j<ncols_o; j++) { 5101 cj[k] = cmap[*bj++]; 5102 ca[k++] = *ba++; 5103 } 5104 } 5105 /* put together the new matrix */ 5106 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5107 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5108 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5109 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5110 mat->free_a = PETSC_TRUE; 5111 mat->free_ij = PETSC_TRUE; 5112 mat->nonew = 0; 5113 } else if (scall == MAT_REUSE_MATRIX) { 5114 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5115 #if defined(PETSC_USE_DEVICE) 5116 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5117 #endif 5118 ci = mat->i; cj = mat->j; cam = mat->a; 5119 for (i=0; i<am; i++) { 5120 /* off-diagonal portion of A */ 5121 ncols_o = bi[i+1] - bi[i]; 5122 for (jo=0; jo<ncols_o; jo++) { 5123 col = cmap[*bj]; 5124 if (col >= cstart) break; 5125 *cam++ = *ba++; bj++; 5126 } 5127 /* diagonal portion of A */ 5128 ncols_d = ai[i+1] - ai[i]; 5129 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5130 /* off-diagonal portion of A */ 5131 for (j=jo; j<ncols_o; j++) { 5132 *cam++ = *ba++; bj++; 5133 } 5134 } 5135 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5136 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5137 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5138 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5139 PetscFunctionReturn(0); 5140 } 5141 5142 /*@ 5143 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5144 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5145 5146 Not Collective 5147 5148 Input Parameters: 5149 + A - the matrix 5150 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5151 5152 Output Parameter: 5153 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5154 - A_loc - the local sequential matrix generated 5155 5156 Level: developer 5157 5158 Notes: 5159 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5160 5161 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5162 5163 @*/ 5164 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5165 { 5166 PetscErrorCode ierr; 5167 Mat Ao,Ad; 5168 const PetscInt *cmap; 5169 PetscMPIInt size; 5170 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5171 5172 PetscFunctionBegin; 5173 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5174 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5175 if (size == 1) { 5176 if (scall == MAT_INITIAL_MATRIX) { 5177 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5178 *A_loc = Ad; 5179 } else if (scall == MAT_REUSE_MATRIX) { 5180 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5181 } 5182 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5183 PetscFunctionReturn(0); 5184 } 5185 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5186 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5187 if (f) { 5188 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5189 } else { 5190 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5191 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5192 Mat_SeqAIJ *c; 5193 PetscInt *ai = a->i, *aj = a->j; 5194 PetscInt *bi = b->i, *bj = b->j; 5195 PetscInt *ci,*cj; 5196 const PetscScalar *aa,*ba; 5197 PetscScalar *ca; 5198 PetscInt i,j,am,dn,on; 5199 5200 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5201 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5202 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5203 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5204 if (scall == MAT_INITIAL_MATRIX) { 5205 PetscInt k; 5206 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5207 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5208 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5209 ci[0] = 0; 5210 for (i=0,k=0; i<am; i++) { 5211 const PetscInt ncols_o = bi[i+1] - bi[i]; 5212 const PetscInt ncols_d = ai[i+1] - ai[i]; 5213 ci[i+1] = ci[i] + ncols_o + ncols_d; 5214 /* diagonal portion of A */ 5215 for (j=0; j<ncols_d; j++,k++) { 5216 cj[k] = *aj++; 5217 ca[k] = *aa++; 5218 } 5219 /* off-diagonal portion of A */ 5220 for (j=0; j<ncols_o; j++,k++) { 5221 cj[k] = dn + *bj++; 5222 ca[k] = *ba++; 5223 } 5224 } 5225 /* put together the new matrix */ 5226 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5227 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5228 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5229 c = (Mat_SeqAIJ*)(*A_loc)->data; 5230 c->free_a = PETSC_TRUE; 5231 c->free_ij = PETSC_TRUE; 5232 c->nonew = 0; 5233 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5234 } else if (scall == MAT_REUSE_MATRIX) { 5235 #if defined(PETSC_HAVE_DEVICE) 5236 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5237 #endif 5238 c = (Mat_SeqAIJ*)(*A_loc)->data; 5239 ca = c->a; 5240 for (i=0; i<am; i++) { 5241 const PetscInt ncols_d = ai[i+1] - ai[i]; 5242 const PetscInt ncols_o = bi[i+1] - bi[i]; 5243 /* diagonal portion of A */ 5244 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5245 /* off-diagonal portion of A */ 5246 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5247 } 5248 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5249 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5250 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5251 if (glob) { 5252 PetscInt cst, *gidx; 5253 5254 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5255 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5256 for (i=0; i<dn; i++) gidx[i] = cst + i; 5257 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5258 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5259 } 5260 } 5261 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5262 PetscFunctionReturn(0); 5263 } 5264 5265 /*@C 5266 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5267 5268 Not Collective 5269 5270 Input Parameters: 5271 + A - the matrix 5272 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5273 - row, col - index sets of rows and columns to extract (or NULL) 5274 5275 Output Parameter: 5276 . A_loc - the local sequential matrix generated 5277 5278 Level: developer 5279 5280 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5281 5282 @*/ 5283 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5284 { 5285 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5286 PetscErrorCode ierr; 5287 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5288 IS isrowa,iscola; 5289 Mat *aloc; 5290 PetscBool match; 5291 5292 PetscFunctionBegin; 5293 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5294 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5295 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5296 if (!row) { 5297 start = A->rmap->rstart; end = A->rmap->rend; 5298 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5299 } else { 5300 isrowa = *row; 5301 } 5302 if (!col) { 5303 start = A->cmap->rstart; 5304 cmap = a->garray; 5305 nzA = a->A->cmap->n; 5306 nzB = a->B->cmap->n; 5307 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5308 ncols = 0; 5309 for (i=0; i<nzB; i++) { 5310 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5311 else break; 5312 } 5313 imark = i; 5314 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5315 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5316 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5317 } else { 5318 iscola = *col; 5319 } 5320 if (scall != MAT_INITIAL_MATRIX) { 5321 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5322 aloc[0] = *A_loc; 5323 } 5324 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5325 if (!col) { /* attach global id of condensed columns */ 5326 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5327 } 5328 *A_loc = aloc[0]; 5329 ierr = PetscFree(aloc);CHKERRQ(ierr); 5330 if (!row) { 5331 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5332 } 5333 if (!col) { 5334 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5335 } 5336 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5337 PetscFunctionReturn(0); 5338 } 5339 5340 /* 5341 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5342 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5343 * on a global size. 5344 * */ 5345 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5346 { 5347 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5348 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5349 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5350 PetscMPIInt owner; 5351 PetscSFNode *iremote,*oiremote; 5352 const PetscInt *lrowindices; 5353 PetscErrorCode ierr; 5354 PetscSF sf,osf; 5355 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5356 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5357 MPI_Comm comm; 5358 ISLocalToGlobalMapping mapping; 5359 5360 PetscFunctionBegin; 5361 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5362 /* plocalsize is the number of roots 5363 * nrows is the number of leaves 5364 * */ 5365 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5366 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5367 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5368 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5369 for (i=0;i<nrows;i++) { 5370 /* Find a remote index and an owner for a row 5371 * The row could be local or remote 5372 * */ 5373 owner = 0; 5374 lidx = 0; 5375 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5376 iremote[i].index = lidx; 5377 iremote[i].rank = owner; 5378 } 5379 /* Create SF to communicate how many nonzero columns for each row */ 5380 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5381 /* SF will figure out the number of nonzero colunms for each row, and their 5382 * offsets 5383 * */ 5384 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5385 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5386 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5387 5388 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5389 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5390 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5391 roffsets[0] = 0; 5392 roffsets[1] = 0; 5393 for (i=0;i<plocalsize;i++) { 5394 /* diag */ 5395 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5396 /* off diag */ 5397 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5398 /* compute offsets so that we relative location for each row */ 5399 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5400 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5401 } 5402 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5403 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5404 /* 'r' means root, and 'l' means leaf */ 5405 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5406 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5407 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5408 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5409 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5410 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5411 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5412 dntotalcols = 0; 5413 ontotalcols = 0; 5414 ncol = 0; 5415 for (i=0;i<nrows;i++) { 5416 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5417 ncol = PetscMax(pnnz[i],ncol); 5418 /* diag */ 5419 dntotalcols += nlcols[i*2+0]; 5420 /* off diag */ 5421 ontotalcols += nlcols[i*2+1]; 5422 } 5423 /* We do not need to figure the right number of columns 5424 * since all the calculations will be done by going through the raw data 5425 * */ 5426 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5427 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5428 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5429 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5430 /* diag */ 5431 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5432 /* off diag */ 5433 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5434 /* diag */ 5435 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5436 /* off diag */ 5437 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5438 dntotalcols = 0; 5439 ontotalcols = 0; 5440 ntotalcols = 0; 5441 for (i=0;i<nrows;i++) { 5442 owner = 0; 5443 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5444 /* Set iremote for diag matrix */ 5445 for (j=0;j<nlcols[i*2+0];j++) { 5446 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5447 iremote[dntotalcols].rank = owner; 5448 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5449 ilocal[dntotalcols++] = ntotalcols++; 5450 } 5451 /* off diag */ 5452 for (j=0;j<nlcols[i*2+1];j++) { 5453 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5454 oiremote[ontotalcols].rank = owner; 5455 oilocal[ontotalcols++] = ntotalcols++; 5456 } 5457 } 5458 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5459 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5460 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5461 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5462 /* P serves as roots and P_oth is leaves 5463 * Diag matrix 5464 * */ 5465 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5466 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5467 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5468 5469 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5470 /* Off diag */ 5471 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5472 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5473 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5474 /* We operate on the matrix internal data for saving memory */ 5475 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5476 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5477 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5478 /* Convert to global indices for diag matrix */ 5479 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5480 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5481 /* We want P_oth store global indices */ 5482 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5483 /* Use memory scalable approach */ 5484 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5485 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5486 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5487 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5488 /* Convert back to local indices */ 5489 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5490 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5491 nout = 0; 5492 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5493 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5494 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5495 /* Exchange values */ 5496 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5497 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5498 /* Stop PETSc from shrinking memory */ 5499 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5500 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5501 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5502 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5503 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5504 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5505 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5506 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5507 PetscFunctionReturn(0); 5508 } 5509 5510 /* 5511 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5512 * This supports MPIAIJ and MAIJ 5513 * */ 5514 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5515 { 5516 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5517 Mat_SeqAIJ *p_oth; 5518 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5519 IS rows,map; 5520 PetscHMapI hamp; 5521 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5522 MPI_Comm comm; 5523 PetscSF sf,osf; 5524 PetscBool has; 5525 PetscErrorCode ierr; 5526 5527 PetscFunctionBegin; 5528 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5529 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5530 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5531 * and then create a submatrix (that often is an overlapping matrix) 5532 * */ 5533 if (reuse == MAT_INITIAL_MATRIX) { 5534 /* Use a hash table to figure out unique keys */ 5535 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5536 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5537 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5538 count = 0; 5539 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5540 for (i=0;i<a->B->cmap->n;i++) { 5541 key = a->garray[i]/dof; 5542 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5543 if (!has) { 5544 mapping[i] = count; 5545 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5546 } else { 5547 /* Current 'i' has the same value the previous step */ 5548 mapping[i] = count-1; 5549 } 5550 } 5551 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5552 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5553 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count); 5554 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5555 off = 0; 5556 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5557 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5558 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5559 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5560 /* In case, the matrix was already created but users want to recreate the matrix */ 5561 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5562 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5563 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5564 ierr = ISDestroy(&map);CHKERRQ(ierr); 5565 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5566 } else if (reuse == MAT_REUSE_MATRIX) { 5567 /* If matrix was already created, we simply update values using SF objects 5568 * that as attached to the matrix ealier. 5569 * */ 5570 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5571 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5572 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5573 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5574 /* Update values in place */ 5575 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5576 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5577 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5578 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5579 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5580 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5581 PetscFunctionReturn(0); 5582 } 5583 5584 /*@C 5585 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5586 5587 Collective on Mat 5588 5589 Input Parameters: 5590 + A,B - the matrices in mpiaij format 5591 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5592 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5593 5594 Output Parameter: 5595 + rowb, colb - index sets of rows and columns of B to extract 5596 - B_seq - the sequential matrix generated 5597 5598 Level: developer 5599 5600 @*/ 5601 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5602 { 5603 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5604 PetscErrorCode ierr; 5605 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5606 IS isrowb,iscolb; 5607 Mat *bseq=NULL; 5608 5609 PetscFunctionBegin; 5610 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5611 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5612 } 5613 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5614 5615 if (scall == MAT_INITIAL_MATRIX) { 5616 start = A->cmap->rstart; 5617 cmap = a->garray; 5618 nzA = a->A->cmap->n; 5619 nzB = a->B->cmap->n; 5620 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5621 ncols = 0; 5622 for (i=0; i<nzB; i++) { /* row < local row index */ 5623 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5624 else break; 5625 } 5626 imark = i; 5627 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5628 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5629 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5630 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5631 } else { 5632 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5633 isrowb = *rowb; iscolb = *colb; 5634 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5635 bseq[0] = *B_seq; 5636 } 5637 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5638 *B_seq = bseq[0]; 5639 ierr = PetscFree(bseq);CHKERRQ(ierr); 5640 if (!rowb) { 5641 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5642 } else { 5643 *rowb = isrowb; 5644 } 5645 if (!colb) { 5646 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5647 } else { 5648 *colb = iscolb; 5649 } 5650 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5651 PetscFunctionReturn(0); 5652 } 5653 5654 /* 5655 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5656 of the OFF-DIAGONAL portion of local A 5657 5658 Collective on Mat 5659 5660 Input Parameters: 5661 + A,B - the matrices in mpiaij format 5662 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5663 5664 Output Parameter: 5665 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5666 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5667 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5668 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5669 5670 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5671 for this matrix. This is not desirable.. 5672 5673 Level: developer 5674 5675 */ 5676 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5677 { 5678 PetscErrorCode ierr; 5679 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5680 Mat_SeqAIJ *b_oth; 5681 VecScatter ctx; 5682 MPI_Comm comm; 5683 const PetscMPIInt *rprocs,*sprocs; 5684 const PetscInt *srow,*rstarts,*sstarts; 5685 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5686 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5687 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5688 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5689 PetscMPIInt size,tag,rank,nreqs; 5690 5691 PetscFunctionBegin; 5692 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5693 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5694 5695 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5696 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5697 } 5698 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5699 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5700 5701 if (size == 1) { 5702 startsj_s = NULL; 5703 bufa_ptr = NULL; 5704 *B_oth = NULL; 5705 PetscFunctionReturn(0); 5706 } 5707 5708 ctx = a->Mvctx; 5709 tag = ((PetscObject)ctx)->tag; 5710 5711 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5712 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5713 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5714 ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr); 5715 ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr); 5716 rwaits = reqs; 5717 swaits = reqs + nrecvs; 5718 5719 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5720 if (scall == MAT_INITIAL_MATRIX) { 5721 /* i-array */ 5722 /*---------*/ 5723 /* post receives */ 5724 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5725 for (i=0; i<nrecvs; i++) { 5726 rowlen = rvalues + rstarts[i]*rbs; 5727 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5728 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5729 } 5730 5731 /* pack the outgoing message */ 5732 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5733 5734 sstartsj[0] = 0; 5735 rstartsj[0] = 0; 5736 len = 0; /* total length of j or a array to be sent */ 5737 if (nsends) { 5738 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5739 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5740 } 5741 for (i=0; i<nsends; i++) { 5742 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5743 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5744 for (j=0; j<nrows; j++) { 5745 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5746 for (l=0; l<sbs; l++) { 5747 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5748 5749 rowlen[j*sbs+l] = ncols; 5750 5751 len += ncols; 5752 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5753 } 5754 k++; 5755 } 5756 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5757 5758 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5759 } 5760 /* recvs and sends of i-array are completed */ 5761 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5762 ierr = PetscFree(svalues);CHKERRQ(ierr); 5763 5764 /* allocate buffers for sending j and a arrays */ 5765 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5766 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5767 5768 /* create i-array of B_oth */ 5769 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5770 5771 b_othi[0] = 0; 5772 len = 0; /* total length of j or a array to be received */ 5773 k = 0; 5774 for (i=0; i<nrecvs; i++) { 5775 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5776 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5777 for (j=0; j<nrows; j++) { 5778 b_othi[k+1] = b_othi[k] + rowlen[j]; 5779 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5780 k++; 5781 } 5782 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5783 } 5784 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5785 5786 /* allocate space for j and a arrrays of B_oth */ 5787 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5788 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5789 5790 /* j-array */ 5791 /*---------*/ 5792 /* post receives of j-array */ 5793 for (i=0; i<nrecvs; i++) { 5794 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5795 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5796 } 5797 5798 /* pack the outgoing message j-array */ 5799 if (nsends) k = sstarts[0]; 5800 for (i=0; i<nsends; i++) { 5801 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5802 bufJ = bufj+sstartsj[i]; 5803 for (j=0; j<nrows; j++) { 5804 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5805 for (ll=0; ll<sbs; ll++) { 5806 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5807 for (l=0; l<ncols; l++) { 5808 *bufJ++ = cols[l]; 5809 } 5810 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5811 } 5812 } 5813 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5814 } 5815 5816 /* recvs and sends of j-array are completed */ 5817 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5818 } else if (scall == MAT_REUSE_MATRIX) { 5819 sstartsj = *startsj_s; 5820 rstartsj = *startsj_r; 5821 bufa = *bufa_ptr; 5822 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5823 b_otha = b_oth->a; 5824 #if defined(PETSC_HAVE_DEVICE) 5825 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5826 #endif 5827 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5828 5829 /* a-array */ 5830 /*---------*/ 5831 /* post receives of a-array */ 5832 for (i=0; i<nrecvs; i++) { 5833 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5834 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5835 } 5836 5837 /* pack the outgoing message a-array */ 5838 if (nsends) k = sstarts[0]; 5839 for (i=0; i<nsends; i++) { 5840 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5841 bufA = bufa+sstartsj[i]; 5842 for (j=0; j<nrows; j++) { 5843 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5844 for (ll=0; ll<sbs; ll++) { 5845 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5846 for (l=0; l<ncols; l++) { 5847 *bufA++ = vals[l]; 5848 } 5849 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5850 } 5851 } 5852 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5853 } 5854 /* recvs and sends of a-array are completed */ 5855 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5856 ierr = PetscFree(reqs);CHKERRQ(ierr); 5857 5858 if (scall == MAT_INITIAL_MATRIX) { 5859 /* put together the new matrix */ 5860 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5861 5862 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5863 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5864 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5865 b_oth->free_a = PETSC_TRUE; 5866 b_oth->free_ij = PETSC_TRUE; 5867 b_oth->nonew = 0; 5868 5869 ierr = PetscFree(bufj);CHKERRQ(ierr); 5870 if (!startsj_s || !bufa_ptr) { 5871 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5872 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5873 } else { 5874 *startsj_s = sstartsj; 5875 *startsj_r = rstartsj; 5876 *bufa_ptr = bufa; 5877 } 5878 } 5879 5880 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5881 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5882 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5883 PetscFunctionReturn(0); 5884 } 5885 5886 /*@C 5887 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5888 5889 Not Collective 5890 5891 Input Parameters: 5892 . A - The matrix in mpiaij format 5893 5894 Output Parameter: 5895 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5896 . colmap - A map from global column index to local index into lvec 5897 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5898 5899 Level: developer 5900 5901 @*/ 5902 #if defined(PETSC_USE_CTABLE) 5903 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5904 #else 5905 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5906 #endif 5907 { 5908 Mat_MPIAIJ *a; 5909 5910 PetscFunctionBegin; 5911 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5912 PetscValidPointer(lvec, 2); 5913 PetscValidPointer(colmap, 3); 5914 PetscValidPointer(multScatter, 4); 5915 a = (Mat_MPIAIJ*) A->data; 5916 if (lvec) *lvec = a->lvec; 5917 if (colmap) *colmap = a->colmap; 5918 if (multScatter) *multScatter = a->Mvctx; 5919 PetscFunctionReturn(0); 5920 } 5921 5922 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5924 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5925 #if defined(PETSC_HAVE_MKL_SPARSE) 5926 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5927 #endif 5928 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5930 #if defined(PETSC_HAVE_ELEMENTAL) 5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5932 #endif 5933 #if defined(PETSC_HAVE_SCALAPACK) 5934 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5935 #endif 5936 #if defined(PETSC_HAVE_HYPRE) 5937 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5938 #endif 5939 #if defined(PETSC_HAVE_CUDA) 5940 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5941 #endif 5942 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5943 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5944 #endif 5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5946 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5947 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5948 5949 /* 5950 Computes (B'*A')' since computing B*A directly is untenable 5951 5952 n p p 5953 [ ] [ ] [ ] 5954 m [ A ] * n [ B ] = m [ C ] 5955 [ ] [ ] [ ] 5956 5957 */ 5958 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5959 { 5960 PetscErrorCode ierr; 5961 Mat At,Bt,Ct; 5962 5963 PetscFunctionBegin; 5964 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5965 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5966 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5967 ierr = MatDestroy(&At);CHKERRQ(ierr); 5968 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5969 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5970 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5971 PetscFunctionReturn(0); 5972 } 5973 5974 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5975 { 5976 PetscErrorCode ierr; 5977 PetscBool cisdense; 5978 5979 PetscFunctionBegin; 5980 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5981 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5982 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5983 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5984 if (!cisdense) { 5985 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5986 } 5987 ierr = MatSetUp(C);CHKERRQ(ierr); 5988 5989 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5990 PetscFunctionReturn(0); 5991 } 5992 5993 /* ----------------------------------------------------------------*/ 5994 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5995 { 5996 Mat_Product *product = C->product; 5997 Mat A = product->A,B=product->B; 5998 5999 PetscFunctionBegin; 6000 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6001 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6002 6003 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6004 C->ops->productsymbolic = MatProductSymbolic_AB; 6005 PetscFunctionReturn(0); 6006 } 6007 6008 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6009 { 6010 PetscErrorCode ierr; 6011 Mat_Product *product = C->product; 6012 6013 PetscFunctionBegin; 6014 if (product->type == MATPRODUCT_AB) { 6015 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6016 } 6017 PetscFunctionReturn(0); 6018 } 6019 /* ----------------------------------------------------------------*/ 6020 6021 /*MC 6022 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6023 6024 Options Database Keys: 6025 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6026 6027 Level: beginner 6028 6029 Notes: 6030 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6031 in this case the values associated with the rows and columns one passes in are set to zero 6032 in the matrix 6033 6034 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6035 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6036 6037 .seealso: MatCreateAIJ() 6038 M*/ 6039 6040 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6041 { 6042 Mat_MPIAIJ *b; 6043 PetscErrorCode ierr; 6044 PetscMPIInt size; 6045 6046 PetscFunctionBegin; 6047 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6048 6049 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6050 B->data = (void*)b; 6051 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6052 B->assembled = PETSC_FALSE; 6053 B->insertmode = NOT_SET_VALUES; 6054 b->size = size; 6055 6056 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6057 6058 /* build cache for off array entries formed */ 6059 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6060 6061 b->donotstash = PETSC_FALSE; 6062 b->colmap = NULL; 6063 b->garray = NULL; 6064 b->roworiented = PETSC_TRUE; 6065 6066 /* stuff used for matrix vector multiply */ 6067 b->lvec = NULL; 6068 b->Mvctx = NULL; 6069 6070 /* stuff for MatGetRow() */ 6071 b->rowindices = NULL; 6072 b->rowvalues = NULL; 6073 b->getrowactive = PETSC_FALSE; 6074 6075 /* flexible pointer used in CUSPARSE classes */ 6076 b->spptr = NULL; 6077 6078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6088 #if defined(PETSC_HAVE_CUDA) 6089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6090 #endif 6091 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6093 #endif 6094 #if defined(PETSC_HAVE_MKL_SPARSE) 6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6096 #endif 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6101 #if defined(PETSC_HAVE_ELEMENTAL) 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6103 #endif 6104 #if defined(PETSC_HAVE_SCALAPACK) 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6106 #endif 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6109 #if defined(PETSC_HAVE_HYPRE) 6110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6112 #endif 6113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6115 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6116 PetscFunctionReturn(0); 6117 } 6118 6119 /*@C 6120 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6121 and "off-diagonal" part of the matrix in CSR format. 6122 6123 Collective 6124 6125 Input Parameters: 6126 + comm - MPI communicator 6127 . m - number of local rows (Cannot be PETSC_DECIDE) 6128 . n - This value should be the same as the local size used in creating the 6129 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6130 calculated if N is given) For square matrices n is almost always m. 6131 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6132 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6133 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6134 . j - column indices 6135 . a - matrix values 6136 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6137 . oj - column indices 6138 - oa - matrix values 6139 6140 Output Parameter: 6141 . mat - the matrix 6142 6143 Level: advanced 6144 6145 Notes: 6146 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6147 must free the arrays once the matrix has been destroyed and not before. 6148 6149 The i and j indices are 0 based 6150 6151 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6152 6153 This sets local rows and cannot be used to set off-processor values. 6154 6155 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6156 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6157 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6158 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6159 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6160 communication if it is known that only local entries will be set. 6161 6162 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6163 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6164 @*/ 6165 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6166 { 6167 PetscErrorCode ierr; 6168 Mat_MPIAIJ *maij; 6169 6170 PetscFunctionBegin; 6171 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6172 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6173 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6174 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6175 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6176 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6177 maij = (Mat_MPIAIJ*) (*mat)->data; 6178 6179 (*mat)->preallocated = PETSC_TRUE; 6180 6181 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6182 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6183 6184 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6185 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6186 6187 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6188 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6189 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6190 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6191 6192 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6193 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6194 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6195 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6196 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6197 PetscFunctionReturn(0); 6198 } 6199 6200 /* 6201 Special version for direct calls from Fortran 6202 */ 6203 #include <petsc/private/fortranimpl.h> 6204 6205 /* Change these macros so can be used in void function */ 6206 #undef CHKERRQ 6207 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6208 #undef SETERRQ2 6209 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6210 #undef SETERRQ3 6211 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6212 #undef SETERRQ 6213 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6214 6215 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6216 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6217 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6218 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6219 #else 6220 #endif 6221 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6222 { 6223 Mat mat = *mmat; 6224 PetscInt m = *mm, n = *mn; 6225 InsertMode addv = *maddv; 6226 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6227 PetscScalar value; 6228 PetscErrorCode ierr; 6229 6230 MatCheckPreallocated(mat,1); 6231 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6232 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6233 { 6234 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6235 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6236 PetscBool roworiented = aij->roworiented; 6237 6238 /* Some Variables required in the macro */ 6239 Mat A = aij->A; 6240 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6241 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6242 MatScalar *aa = a->a; 6243 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6244 Mat B = aij->B; 6245 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6246 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6247 MatScalar *ba = b->a; 6248 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6249 * cannot use "#if defined" inside a macro. */ 6250 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6251 6252 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6253 PetscInt nonew = a->nonew; 6254 MatScalar *ap1,*ap2; 6255 6256 PetscFunctionBegin; 6257 for (i=0; i<m; i++) { 6258 if (im[i] < 0) continue; 6259 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6260 if (im[i] >= rstart && im[i] < rend) { 6261 row = im[i] - rstart; 6262 lastcol1 = -1; 6263 rp1 = aj + ai[row]; 6264 ap1 = aa + ai[row]; 6265 rmax1 = aimax[row]; 6266 nrow1 = ailen[row]; 6267 low1 = 0; 6268 high1 = nrow1; 6269 lastcol2 = -1; 6270 rp2 = bj + bi[row]; 6271 ap2 = ba + bi[row]; 6272 rmax2 = bimax[row]; 6273 nrow2 = bilen[row]; 6274 low2 = 0; 6275 high2 = nrow2; 6276 6277 for (j=0; j<n; j++) { 6278 if (roworiented) value = v[i*n+j]; 6279 else value = v[i+j*m]; 6280 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6281 if (in[j] >= cstart && in[j] < cend) { 6282 col = in[j] - cstart; 6283 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6284 #if defined(PETSC_HAVE_DEVICE) 6285 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6286 #endif 6287 } else if (in[j] < 0) continue; 6288 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6289 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6290 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6291 } else { 6292 if (mat->was_assembled) { 6293 if (!aij->colmap) { 6294 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6295 } 6296 #if defined(PETSC_USE_CTABLE) 6297 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6298 col--; 6299 #else 6300 col = aij->colmap[in[j]] - 1; 6301 #endif 6302 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6303 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6304 col = in[j]; 6305 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6306 B = aij->B; 6307 b = (Mat_SeqAIJ*)B->data; 6308 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6309 rp2 = bj + bi[row]; 6310 ap2 = ba + bi[row]; 6311 rmax2 = bimax[row]; 6312 nrow2 = bilen[row]; 6313 low2 = 0; 6314 high2 = nrow2; 6315 bm = aij->B->rmap->n; 6316 ba = b->a; 6317 inserted = PETSC_FALSE; 6318 } 6319 } else col = in[j]; 6320 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6321 #if defined(PETSC_HAVE_DEVICE) 6322 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6323 #endif 6324 } 6325 } 6326 } else if (!aij->donotstash) { 6327 if (roworiented) { 6328 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6329 } else { 6330 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6331 } 6332 } 6333 } 6334 } 6335 PetscFunctionReturnVoid(); 6336 } 6337 6338 typedef struct { 6339 Mat *mp; /* intermediate products */ 6340 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6341 PetscInt cp; /* number of intermediate products */ 6342 6343 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6344 PetscInt *startsj_s,*startsj_r; 6345 PetscScalar *bufa; 6346 Mat P_oth; 6347 6348 /* may take advantage of merging product->B */ 6349 Mat Bloc; /* B-local by merging diag and off-diag */ 6350 6351 /* cusparse does not have support to split between symbolic and numeric phases. 6352 When api_user is true, we don't need to update the numerical values 6353 of the temporary storage */ 6354 PetscBool reusesym; 6355 6356 /* support for COO values insertion */ 6357 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6358 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6359 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6360 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6361 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6362 PetscMemType mtype; 6363 6364 /* customization */ 6365 PetscBool abmerge; 6366 PetscBool P_oth_bind; 6367 } MatMatMPIAIJBACKEND; 6368 6369 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6370 { 6371 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6372 PetscInt i; 6373 PetscErrorCode ierr; 6374 6375 PetscFunctionBegin; 6376 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6377 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6378 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6379 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6380 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6381 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6382 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6383 for (i = 0; i < mmdata->cp; i++) { 6384 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6385 } 6386 ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr); 6387 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6388 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6389 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6390 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6391 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6392 PetscFunctionReturn(0); 6393 } 6394 6395 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6396 { 6397 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6398 PetscErrorCode ierr; 6399 6400 PetscFunctionBegin; 6401 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6402 if (f) { 6403 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6404 } else { 6405 const PetscScalar *vv; 6406 6407 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6408 if (n && idx) { 6409 PetscScalar *w = v; 6410 const PetscInt *oi = idx; 6411 PetscInt j; 6412 6413 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6414 } else { 6415 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6416 } 6417 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6418 } 6419 PetscFunctionReturn(0); 6420 } 6421 6422 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6423 { 6424 MatMatMPIAIJBACKEND *mmdata; 6425 PetscInt i,n_d,n_o; 6426 PetscErrorCode ierr; 6427 6428 PetscFunctionBegin; 6429 MatCheckProduct(C,1); 6430 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6431 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6432 if (!mmdata->reusesym) { /* update temporary matrices */ 6433 if (mmdata->P_oth) { 6434 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6435 } 6436 if (mmdata->Bloc) { 6437 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6438 } 6439 } 6440 mmdata->reusesym = PETSC_FALSE; 6441 6442 for (i = 0; i < mmdata->cp; i++) { 6443 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6444 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6445 } 6446 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6447 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6448 6449 if (mmdata->mptmp[i]) continue; 6450 if (noff) { 6451 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6452 6453 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6454 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6455 n_o += noff; 6456 n_d += nown; 6457 } else { 6458 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6459 6460 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6461 n_d += mm->nz; 6462 } 6463 } 6464 if (mmdata->hasoffproc) { /* offprocess insertion */ 6465 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6466 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6467 } 6468 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6469 PetscFunctionReturn(0); 6470 } 6471 6472 /* Support for Pt * A, A * P, or Pt * A * P */ 6473 #define MAX_NUMBER_INTERMEDIATE 4 6474 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6475 { 6476 Mat_Product *product = C->product; 6477 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6478 Mat_MPIAIJ *a,*p; 6479 MatMatMPIAIJBACKEND *mmdata; 6480 ISLocalToGlobalMapping P_oth_l2g = NULL; 6481 IS glob = NULL; 6482 const char *prefix; 6483 char pprefix[256]; 6484 const PetscInt *globidx,*P_oth_idx; 6485 PetscInt i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j; 6486 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6487 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6488 /* a base offset; type-2: sparse with a local to global map table */ 6489 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6490 6491 MatProductType ptype; 6492 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6493 PetscMPIInt size; 6494 PetscErrorCode ierr; 6495 6496 PetscFunctionBegin; 6497 MatCheckProduct(C,1); 6498 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6499 ptype = product->type; 6500 if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB; 6501 switch (ptype) { 6502 case MATPRODUCT_AB: 6503 A = product->A; 6504 P = product->B; 6505 m = A->rmap->n; 6506 n = P->cmap->n; 6507 M = A->rmap->N; 6508 N = P->cmap->N; 6509 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6510 break; 6511 case MATPRODUCT_AtB: 6512 P = product->A; 6513 A = product->B; 6514 m = P->cmap->n; 6515 n = A->cmap->n; 6516 M = P->cmap->N; 6517 N = A->cmap->N; 6518 hasoffproc = PETSC_TRUE; 6519 break; 6520 case MATPRODUCT_PtAP: 6521 A = product->A; 6522 P = product->B; 6523 m = P->cmap->n; 6524 n = P->cmap->n; 6525 M = P->cmap->N; 6526 N = P->cmap->N; 6527 hasoffproc = PETSC_TRUE; 6528 break; 6529 default: 6530 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6531 } 6532 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6533 if (size == 1) hasoffproc = PETSC_FALSE; 6534 6535 /* defaults */ 6536 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6537 mp[i] = NULL; 6538 mptmp[i] = PETSC_FALSE; 6539 rmapt[i] = -1; 6540 cmapt[i] = -1; 6541 rmapa[i] = NULL; 6542 cmapa[i] = NULL; 6543 } 6544 6545 /* customization */ 6546 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6547 mmdata->reusesym = product->api_user; 6548 if (ptype == MATPRODUCT_AB) { 6549 if (product->api_user) { 6550 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6551 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6552 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6553 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6554 } else { 6555 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6556 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6557 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6558 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6559 } 6560 } else if (ptype == MATPRODUCT_PtAP) { 6561 if (product->api_user) { 6562 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6563 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6564 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6565 } else { 6566 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6567 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6568 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6569 } 6570 } 6571 a = (Mat_MPIAIJ*)A->data; 6572 p = (Mat_MPIAIJ*)P->data; 6573 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6574 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6575 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6576 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6577 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6578 6579 cp = 0; 6580 switch (ptype) { 6581 case MATPRODUCT_AB: /* A * P */ 6582 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6583 6584 /* A_diag * P_local (merged or not) */ 6585 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6586 /* P is product->B */ 6587 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6588 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6589 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6590 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6591 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6592 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6593 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6594 mp[cp]->product->api_user = product->api_user; 6595 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6596 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6597 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6598 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6599 rmapt[cp] = 1; 6600 cmapt[cp] = 2; 6601 cmapa[cp] = globidx; 6602 mptmp[cp] = PETSC_FALSE; 6603 cp++; 6604 } else { /* A_diag * P_diag and A_diag * P_off */ 6605 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6606 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6607 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6608 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6609 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6610 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6611 mp[cp]->product->api_user = product->api_user; 6612 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6613 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6614 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6615 rmapt[cp] = 1; 6616 cmapt[cp] = 1; 6617 mptmp[cp] = PETSC_FALSE; 6618 cp++; 6619 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6620 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6621 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6622 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6623 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6624 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6625 mp[cp]->product->api_user = product->api_user; 6626 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6627 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6628 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6629 rmapt[cp] = 1; 6630 cmapt[cp] = 2; 6631 cmapa[cp] = p->garray; 6632 mptmp[cp] = PETSC_FALSE; 6633 cp++; 6634 } 6635 6636 /* A_off * P_other */ 6637 if (mmdata->P_oth) { 6638 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */ 6639 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6640 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6641 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6642 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6643 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6644 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6645 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6646 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6647 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6648 mp[cp]->product->api_user = product->api_user; 6649 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6650 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6651 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6652 rmapt[cp] = 1; 6653 cmapt[cp] = 2; 6654 cmapa[cp] = P_oth_idx; 6655 mptmp[cp] = PETSC_FALSE; 6656 cp++; 6657 } 6658 break; 6659 6660 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6661 /* A is product->B */ 6662 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6663 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 6664 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6665 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6666 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6667 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6668 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6669 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6670 mp[cp]->product->api_user = product->api_user; 6671 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6672 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6673 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6674 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6675 rmapt[cp] = 2; 6676 rmapa[cp] = globidx; 6677 cmapt[cp] = 2; 6678 cmapa[cp] = globidx; 6679 mptmp[cp] = PETSC_FALSE; 6680 cp++; 6681 } else { 6682 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6683 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6684 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6685 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6686 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6687 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6688 mp[cp]->product->api_user = product->api_user; 6689 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6690 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6691 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6692 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6693 rmapt[cp] = 1; 6694 cmapt[cp] = 2; 6695 cmapa[cp] = globidx; 6696 mptmp[cp] = PETSC_FALSE; 6697 cp++; 6698 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6699 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6700 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6701 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6702 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6703 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6704 mp[cp]->product->api_user = product->api_user; 6705 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6706 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6707 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6708 rmapt[cp] = 2; 6709 rmapa[cp] = p->garray; 6710 cmapt[cp] = 2; 6711 cmapa[cp] = globidx; 6712 mptmp[cp] = PETSC_FALSE; 6713 cp++; 6714 } 6715 break; 6716 case MATPRODUCT_PtAP: 6717 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6718 /* P is product->B */ 6719 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6720 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6721 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6722 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6723 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6724 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6725 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6726 mp[cp]->product->api_user = product->api_user; 6727 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6728 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6729 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6730 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6731 rmapt[cp] = 2; 6732 rmapa[cp] = globidx; 6733 cmapt[cp] = 2; 6734 cmapa[cp] = globidx; 6735 mptmp[cp] = PETSC_FALSE; 6736 cp++; 6737 if (mmdata->P_oth) { 6738 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6739 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6740 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6741 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6742 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6743 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6744 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6745 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6746 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6747 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6748 mp[cp]->product->api_user = product->api_user; 6749 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6750 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6751 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6752 mptmp[cp] = PETSC_TRUE; 6753 cp++; 6754 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6755 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6756 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6757 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6758 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6759 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6760 mp[cp]->product->api_user = product->api_user; 6761 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6762 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6763 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6764 rmapt[cp] = 2; 6765 rmapa[cp] = globidx; 6766 cmapt[cp] = 2; 6767 cmapa[cp] = P_oth_idx; 6768 mptmp[cp] = PETSC_FALSE; 6769 cp++; 6770 } 6771 break; 6772 default: 6773 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6774 } 6775 /* sanity check */ 6776 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6777 6778 ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr); 6779 for (i = 0; i < cp; i++) { 6780 mmdata->mp[i] = mp[i]; 6781 mmdata->mptmp[i] = mptmp[i]; 6782 } 6783 mmdata->cp = cp; 6784 C->product->data = mmdata; 6785 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6786 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6787 6788 /* memory type */ 6789 mmdata->mtype = PETSC_MEMTYPE_HOST; 6790 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6791 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6792 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6793 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6794 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6795 6796 /* prepare coo coordinates for values insertion */ 6797 6798 /* count total nonzeros of those intermediate seqaij Mats 6799 ncoo_d: # of nonzeros of matrices that do not have offproc entries 6800 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 6801 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 6802 */ 6803 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6804 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6805 if (mptmp[cp]) continue; 6806 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 6807 const PetscInt *rmap = rmapa[cp]; 6808 const PetscInt mr = mp[cp]->rmap->n; 6809 const PetscInt rs = C->rmap->rstart; 6810 const PetscInt re = C->rmap->rend; 6811 const PetscInt *ii = mm->i; 6812 for (i = 0; i < mr; i++) { 6813 const PetscInt gr = rmap[i]; 6814 const PetscInt nz = ii[i+1] - ii[i]; 6815 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 6816 else ncoo_oown += nz; /* this row is local */ 6817 } 6818 } else ncoo_d += mm->nz; 6819 } 6820 6821 /* 6822 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 6823 6824 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 6825 6826 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 6827 6828 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 6829 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 6830 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 6831 6832 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 6833 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 6834 */ 6835 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */ 6836 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6837 6838 /* gather (i,j) of nonzeros inserted by remote procs */ 6839 if (hasoffproc) { 6840 PetscSF msf; 6841 PetscInt ncoo2,*coo_i2,*coo_j2; 6842 6843 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6844 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6845 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */ 6846 6847 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6848 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6849 PetscInt *idxoff = mmdata->off[cp]; 6850 PetscInt *idxown = mmdata->own[cp]; 6851 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 6852 const PetscInt *rmap = rmapa[cp]; 6853 const PetscInt *cmap = cmapa[cp]; 6854 const PetscInt *ii = mm->i; 6855 PetscInt *coi = coo_i + ncoo_o; 6856 PetscInt *coj = coo_j + ncoo_o; 6857 const PetscInt mr = mp[cp]->rmap->n; 6858 const PetscInt rs = C->rmap->rstart; 6859 const PetscInt re = C->rmap->rend; 6860 const PetscInt cs = C->cmap->rstart; 6861 for (i = 0; i < mr; i++) { 6862 const PetscInt *jj = mm->j + ii[i]; 6863 const PetscInt gr = rmap[i]; 6864 const PetscInt nz = ii[i+1] - ii[i]; 6865 if (gr < rs || gr >= re) { /* this is an offproc row */ 6866 for (j = ii[i]; j < ii[i+1]; j++) { 6867 *coi++ = gr; 6868 *idxoff++ = j; 6869 } 6870 if (!cmapt[cp]) { /* already global */ 6871 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6872 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6873 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6874 } else { /* offdiag */ 6875 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6876 } 6877 ncoo_o += nz; 6878 } else { /* this is a local row */ 6879 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6880 } 6881 } 6882 } 6883 mmdata->off[cp + 1] = idxoff; 6884 mmdata->own[cp + 1] = idxown; 6885 } 6886 6887 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6888 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6889 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6890 ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr); 6891 ncoo = ncoo_d + ncoo_oown + ncoo2; 6892 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6893 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */ 6894 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6895 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6896 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6897 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6898 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 6899 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6900 coo_i = coo_i2; 6901 coo_j = coo_j2; 6902 } else { /* no offproc values insertion */ 6903 ncoo = ncoo_d; 6904 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6905 6906 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6907 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6908 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6909 } 6910 mmdata->hasoffproc = hasoffproc; 6911 6912 /* gather (i,j) of nonzeros inserted locally */ 6913 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6914 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6915 PetscInt *coi = coo_i + ncoo_d; 6916 PetscInt *coj = coo_j + ncoo_d; 6917 const PetscInt *jj = mm->j; 6918 const PetscInt *ii = mm->i; 6919 const PetscInt *cmap = cmapa[cp]; 6920 const PetscInt *rmap = rmapa[cp]; 6921 const PetscInt mr = mp[cp]->rmap->n; 6922 const PetscInt rs = C->rmap->rstart; 6923 const PetscInt re = C->rmap->rend; 6924 const PetscInt cs = C->cmap->rstart; 6925 6926 if (mptmp[cp]) continue; 6927 if (rmapt[cp] == 1) { /* consecutive rows */ 6928 /* fill coo_i */ 6929 for (i = 0; i < mr; i++) { 6930 const PetscInt gr = i + rs; 6931 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6932 } 6933 /* fill coo_j */ 6934 if (!cmapt[cp]) { /* type-0, already global */ 6935 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6936 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 6937 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 6938 } else { /* type-2, local to global for sparse columns */ 6939 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6940 } 6941 ncoo_d += mm->nz; 6942 } else if (rmapt[cp] == 2) { /* sparse rows */ 6943 for (i = 0; i < mr; i++) { 6944 const PetscInt *jj = mm->j + ii[i]; 6945 const PetscInt gr = rmap[i]; 6946 const PetscInt nz = ii[i+1] - ii[i]; 6947 if (gr >= rs && gr < re) { /* local rows */ 6948 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6949 if (!cmapt[cp]) { /* type-0, already global */ 6950 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6951 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6952 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6953 } else { /* type-2, local to global for sparse columns */ 6954 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6955 } 6956 ncoo_d += nz; 6957 } 6958 } 6959 } 6960 } 6961 if (glob) { 6962 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6963 } 6964 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6965 if (P_oth_l2g) { 6966 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6967 } 6968 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6969 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 6970 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6971 6972 /* preallocate with COO data */ 6973 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6974 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6975 PetscFunctionReturn(0); 6976 } 6977 6978 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 6979 { 6980 Mat_Product *product = mat->product; 6981 PetscErrorCode ierr; 6982 #if defined(PETSC_HAVE_DEVICE) 6983 PetscBool match = PETSC_FALSE; 6984 PetscBool usecpu = PETSC_FALSE; 6985 #else 6986 PetscBool match = PETSC_TRUE; 6987 #endif 6988 6989 PetscFunctionBegin; 6990 MatCheckProduct(mat,1); 6991 #if defined(PETSC_HAVE_DEVICE) 6992 if (!product->A->boundtocpu && !product->B->boundtocpu) { 6993 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 6994 } 6995 if (match) { /* we can always fallback to the CPU if requested */ 6996 switch (product->type) { 6997 case MATPRODUCT_AB: 6998 if (product->api_user) { 6999 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7000 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7001 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7002 } else { 7003 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7004 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7005 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7006 } 7007 break; 7008 case MATPRODUCT_AtB: 7009 if (product->api_user) { 7010 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7011 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7012 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7013 } else { 7014 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7015 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7016 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7017 } 7018 break; 7019 case MATPRODUCT_PtAP: 7020 if (product->api_user) { 7021 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7022 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7023 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7024 } else { 7025 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7026 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7027 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7028 } 7029 break; 7030 default: 7031 break; 7032 } 7033 match = (PetscBool)!usecpu; 7034 } 7035 #endif 7036 if (match) { 7037 switch (product->type) { 7038 case MATPRODUCT_AB: 7039 case MATPRODUCT_AtB: 7040 case MATPRODUCT_PtAP: 7041 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7042 break; 7043 default: 7044 break; 7045 } 7046 } 7047 /* fallback to MPIAIJ ops */ 7048 if (!mat->ops->productsymbolic) { 7049 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7050 } 7051 PetscFunctionReturn(0); 7052 } 7053