1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 63 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 64 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 65 * to differ from the parent matrix. */ 66 if (a->lvec) { 67 ierr = VecBindToCPU(a->lvec,flg);CHKERRQ(ierr); 68 } 69 if (a->diag) { 70 ierr = VecBindToCPU(a->diag,flg);CHKERRQ(ierr); 71 } 72 73 PetscFunctionReturn(0); 74 } 75 76 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 77 { 78 PetscErrorCode ierr; 79 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 80 81 PetscFunctionBegin; 82 if (mat->A) { 83 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 84 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 85 } 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 90 { 91 PetscErrorCode ierr; 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 93 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 94 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 95 const PetscInt *ia,*ib; 96 const MatScalar *aa,*bb,*aav,*bav; 97 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 98 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 99 100 PetscFunctionBegin; 101 *keptrows = NULL; 102 103 ia = a->i; 104 ib = b->i; 105 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 106 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 107 for (i=0; i<m; i++) { 108 na = ia[i+1] - ia[i]; 109 nb = ib[i+1] - ib[i]; 110 if (!na && !nb) { 111 cnt++; 112 goto ok1; 113 } 114 aa = aav + ia[i]; 115 for (j=0; j<na; j++) { 116 if (aa[j] != 0.0) goto ok1; 117 } 118 bb = bav + ib[i]; 119 for (j=0; j <nb; j++) { 120 if (bb[j] != 0.0) goto ok1; 121 } 122 cnt++; 123 ok1:; 124 } 125 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 126 if (!n0rows) { 127 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 128 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 132 cnt = 0; 133 for (i=0; i<m; i++) { 134 na = ia[i+1] - ia[i]; 135 nb = ib[i+1] - ib[i]; 136 if (!na && !nb) continue; 137 aa = aav + ia[i]; 138 for (j=0; j<na;j++) { 139 if (aa[j] != 0.0) { 140 rows[cnt++] = rstart + i; 141 goto ok2; 142 } 143 } 144 bb = bav + ib[i]; 145 for (j=0; j<nb; j++) { 146 if (bb[j] != 0.0) { 147 rows[cnt++] = rstart + i; 148 goto ok2; 149 } 150 } 151 ok2:; 152 } 153 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 154 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 155 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 156 PetscFunctionReturn(0); 157 } 158 159 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 160 { 161 PetscErrorCode ierr; 162 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 163 PetscBool cong; 164 165 PetscFunctionBegin; 166 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 167 if (Y->assembled && cong) { 168 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 169 } else { 170 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 171 } 172 PetscFunctionReturn(0); 173 } 174 175 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 176 { 177 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 178 PetscErrorCode ierr; 179 PetscInt i,rstart,nrows,*rows; 180 181 PetscFunctionBegin; 182 *zrows = NULL; 183 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 184 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 185 for (i=0; i<nrows; i++) rows[i] += rstart; 186 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 191 { 192 PetscErrorCode ierr; 193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 194 PetscInt i,m,n,*garray = aij->garray; 195 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 196 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 197 PetscReal *work; 198 const PetscScalar *dummy; 199 200 PetscFunctionBegin; 201 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 202 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 203 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 204 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 205 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 206 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 207 if (type == NORM_2) { 208 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 209 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 210 } 211 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 212 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 213 } 214 } else if (type == NORM_1) { 215 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 216 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 217 } 218 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 219 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 220 } 221 } else if (type == NORM_INFINITY) { 222 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 223 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 224 } 225 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 226 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 227 } 228 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 229 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 230 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 231 } 232 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 233 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 234 } 235 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 236 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 237 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 238 } 239 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 240 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 241 } 242 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 243 if (type == NORM_INFINITY) { 244 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 245 } else { 246 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 247 } 248 ierr = PetscFree(work);CHKERRQ(ierr); 249 if (type == NORM_2) { 250 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 251 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 252 for (i=0; i<n; i++) reductions[i] /= m; 253 } 254 PetscFunctionReturn(0); 255 } 256 257 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 258 { 259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 260 IS sis,gis; 261 PetscErrorCode ierr; 262 const PetscInt *isis,*igis; 263 PetscInt n,*iis,nsis,ngis,rstart,i; 264 265 PetscFunctionBegin; 266 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 267 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 268 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 269 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 270 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 271 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 272 273 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 274 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 275 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 276 n = ngis + nsis; 277 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 278 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 279 for (i=0; i<n; i++) iis[i] += rstart; 280 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 281 282 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 283 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 284 ierr = ISDestroy(&sis);CHKERRQ(ierr); 285 ierr = ISDestroy(&gis);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 /* 290 Local utility routine that creates a mapping from the global column 291 number to the local number in the off-diagonal part of the local 292 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 293 a slightly higher hash table cost; without it it is not scalable (each processor 294 has an order N integer array but is fast to access. 295 */ 296 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 297 { 298 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 299 PetscErrorCode ierr; 300 PetscInt n = aij->B->cmap->n,i; 301 302 PetscFunctionBegin; 303 PetscCheckFalse(n && !aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 304 #if defined(PETSC_USE_CTABLE) 305 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 306 for (i=0; i<n; i++) { 307 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 308 } 309 #else 310 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 311 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 312 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 313 #endif 314 PetscFunctionReturn(0); 315 } 316 317 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 318 { \ 319 if (col <= lastcol1) low1 = 0; \ 320 else high1 = nrow1; \ 321 lastcol1 = col;\ 322 while (high1-low1 > 5) { \ 323 t = (low1+high1)/2; \ 324 if (rp1[t] > col) high1 = t; \ 325 else low1 = t; \ 326 } \ 327 for (_i=low1; _i<high1; _i++) { \ 328 if (rp1[_i] > col) break; \ 329 if (rp1[_i] == col) { \ 330 if (addv == ADD_VALUES) { \ 331 ap1[_i] += value; \ 332 /* Not sure LogFlops will slow dow the code or not */ \ 333 (void)PetscLogFlops(1.0); \ 334 } \ 335 else ap1[_i] = value; \ 336 goto a_noinsert; \ 337 } \ 338 } \ 339 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 340 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 341 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 342 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 343 N = nrow1++ - 1; a->nz++; high1++; \ 344 /* shift up all the later entries in this row */ \ 345 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 346 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 347 rp1[_i] = col; \ 348 ap1[_i] = value; \ 349 A->nonzerostate++;\ 350 a_noinsert: ; \ 351 ailen[row] = nrow1; \ 352 } 353 354 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 355 { \ 356 if (col <= lastcol2) low2 = 0; \ 357 else high2 = nrow2; \ 358 lastcol2 = col; \ 359 while (high2-low2 > 5) { \ 360 t = (low2+high2)/2; \ 361 if (rp2[t] > col) high2 = t; \ 362 else low2 = t; \ 363 } \ 364 for (_i=low2; _i<high2; _i++) { \ 365 if (rp2[_i] > col) break; \ 366 if (rp2[_i] == col) { \ 367 if (addv == ADD_VALUES) { \ 368 ap2[_i] += value; \ 369 (void)PetscLogFlops(1.0); \ 370 } \ 371 else ap2[_i] = value; \ 372 goto b_noinsert; \ 373 } \ 374 } \ 375 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 376 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 377 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 378 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 379 N = nrow2++ - 1; b->nz++; high2++; \ 380 /* shift up all the later entries in this row */ \ 381 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 382 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 383 rp2[_i] = col; \ 384 ap2[_i] = value; \ 385 B->nonzerostate++; \ 386 b_noinsert: ; \ 387 bilen[row] = nrow2; \ 388 } 389 390 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 391 { 392 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 393 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 394 PetscErrorCode ierr; 395 PetscInt l,*garray = mat->garray,diag; 396 PetscScalar *aa,*ba; 397 398 PetscFunctionBegin; 399 /* code only works for square matrices A */ 400 401 /* find size of row to the left of the diagonal part */ 402 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 403 row = row - diag; 404 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 405 if (garray[b->j[b->i[row]+l]] > diag) break; 406 } 407 if (l) { 408 ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr); 409 ierr = PetscArraycpy(ba+b->i[row],v,l);CHKERRQ(ierr); 410 ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr); 411 } 412 413 /* diagonal part */ 414 if (a->i[row+1]-a->i[row]) { 415 ierr = MatSeqAIJGetArray(mat->A,&aa);CHKERRQ(ierr); 416 ierr = PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 417 ierr = MatSeqAIJRestoreArray(mat->A,&aa);CHKERRQ(ierr); 418 } 419 420 /* right of diagonal part */ 421 if (b->i[row+1]-b->i[row]-l) { 422 ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr); 423 ierr = PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 424 ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 430 { 431 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 432 PetscScalar value = 0.0; 433 PetscErrorCode ierr; 434 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 435 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 436 PetscBool roworiented = aij->roworiented; 437 438 /* Some Variables required in the macro */ 439 Mat A = aij->A; 440 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 441 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 442 PetscBool ignorezeroentries = a->ignorezeroentries; 443 Mat B = aij->B; 444 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 445 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 446 MatScalar *aa,*ba; 447 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 448 PetscInt nonew; 449 MatScalar *ap1,*ap2; 450 451 PetscFunctionBegin; 452 ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr); 453 ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr); 454 for (i=0; i<m; i++) { 455 if (im[i] < 0) continue; 456 PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 lastcol1 = -1; 460 rp1 = aj + ai[row]; 461 ap1 = aa + ai[row]; 462 rmax1 = aimax[row]; 463 nrow1 = ailen[row]; 464 low1 = 0; 465 high1 = nrow1; 466 lastcol2 = -1; 467 rp2 = bj + bi[row]; 468 ap2 = ba + bi[row]; 469 rmax2 = bimax[row]; 470 nrow2 = bilen[row]; 471 low2 = 0; 472 high2 = nrow2; 473 474 for (j=0; j<n; j++) { 475 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 476 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 477 if (in[j] >= cstart && in[j] < cend) { 478 col = in[j] - cstart; 479 nonew = a->nonew; 480 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 481 } else if (in[j] < 0) continue; 482 else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 483 else { 484 if (mat->was_assembled) { 485 if (!aij->colmap) { 486 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 487 } 488 #if defined(PETSC_USE_CTABLE) 489 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); /* map global col ids to local ones */ 490 col--; 491 #else 492 col = aij->colmap[in[j]] - 1; 493 #endif 494 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 495 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); /* Change aij->B from reduced/local format to expanded/global format */ 496 col = in[j]; 497 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 498 B = aij->B; 499 b = (Mat_SeqAIJ*)B->data; 500 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 501 rp2 = bj + bi[row]; 502 ap2 = ba + bi[row]; 503 rmax2 = bimax[row]; 504 nrow2 = bilen[row]; 505 low2 = 0; 506 high2 = nrow2; 507 bm = aij->B->rmap->n; 508 ba = b->a; 509 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 510 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 511 ierr = PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 512 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 513 } 514 } else col = in[j]; 515 nonew = b->nonew; 516 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 517 } 518 } 519 } else { 520 PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 521 if (!aij->donotstash) { 522 mat->assembled = PETSC_FALSE; 523 if (roworiented) { 524 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 525 } else { 526 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 527 } 528 } 529 } 530 } 531 ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr); 532 ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 /* 537 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 538 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 539 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 540 */ 541 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 542 { 543 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 544 Mat A = aij->A; /* diagonal part of the matrix */ 545 Mat B = aij->B; /* offdiagonal part of the matrix */ 546 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 547 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 548 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 549 PetscInt *ailen = a->ilen,*aj = a->j; 550 PetscInt *bilen = b->ilen,*bj = b->j; 551 PetscInt am = aij->A->rmap->n,j; 552 PetscInt diag_so_far = 0,dnz; 553 PetscInt offd_so_far = 0,onz; 554 555 PetscFunctionBegin; 556 /* Iterate over all rows of the matrix */ 557 for (j=0; j<am; j++) { 558 dnz = onz = 0; 559 /* Iterate over all non-zero columns of the current row */ 560 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 561 /* If column is in the diagonal */ 562 if (mat_j[col] >= cstart && mat_j[col] < cend) { 563 aj[diag_so_far++] = mat_j[col] - cstart; 564 dnz++; 565 } else { /* off-diagonal entries */ 566 bj[offd_so_far++] = mat_j[col]; 567 onz++; 568 } 569 } 570 ailen[j] = dnz; 571 bilen[j] = onz; 572 } 573 PetscFunctionReturn(0); 574 } 575 576 /* 577 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 578 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 579 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 580 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 581 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 582 */ 583 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 584 { 585 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 586 Mat A = aij->A; /* diagonal part of the matrix */ 587 Mat B = aij->B; /* offdiagonal part of the matrix */ 588 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 589 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 590 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 591 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 592 PetscInt *ailen = a->ilen,*aj = a->j; 593 PetscInt *bilen = b->ilen,*bj = b->j; 594 PetscInt am = aij->A->rmap->n,j; 595 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 596 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 597 PetscScalar *aa = a->a,*ba = b->a; 598 599 PetscFunctionBegin; 600 /* Iterate over all rows of the matrix */ 601 for (j=0; j<am; j++) { 602 dnz_row = onz_row = 0; 603 rowstart_offd = full_offd_i[j]; 604 rowstart_diag = full_diag_i[j]; 605 /* Iterate over all non-zero columns of the current row */ 606 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 607 /* If column is in the diagonal */ 608 if (mat_j[col] >= cstart && mat_j[col] < cend) { 609 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 610 aa[rowstart_diag+dnz_row] = mat_a[col]; 611 dnz_row++; 612 } else { /* off-diagonal entries */ 613 bj[rowstart_offd+onz_row] = mat_j[col]; 614 ba[rowstart_offd+onz_row] = mat_a[col]; 615 onz_row++; 616 } 617 } 618 ailen[j] = dnz_row; 619 bilen[j] = onz_row; 620 } 621 PetscFunctionReturn(0); 622 } 623 624 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 627 PetscErrorCode ierr; 628 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 629 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 630 631 PetscFunctionBegin; 632 for (i=0; i<m; i++) { 633 if (idxm[i] < 0) continue; /* negative row */ 634 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 635 if (idxm[i] >= rstart && idxm[i] < rend) { 636 row = idxm[i] - rstart; 637 for (j=0; j<n; j++) { 638 if (idxn[j] < 0) continue; /* negative column */ 639 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 640 if (idxn[j] >= cstart && idxn[j] < cend) { 641 col = idxn[j] - cstart; 642 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 643 } else { 644 if (!aij->colmap) { 645 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 646 } 647 #if defined(PETSC_USE_CTABLE) 648 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 649 col--; 650 #else 651 col = aij->colmap[idxn[j]] - 1; 652 #endif 653 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 654 else { 655 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 656 } 657 } 658 } 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 665 { 666 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 667 PetscErrorCode ierr; 668 PetscInt nstash,reallocs; 669 670 PetscFunctionBegin; 671 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 672 673 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 674 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 675 ierr = PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 680 { 681 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 682 PetscErrorCode ierr; 683 PetscMPIInt n; 684 PetscInt i,j,rstart,ncols,flg; 685 PetscInt *row,*col; 686 PetscBool other_disassembled; 687 PetscScalar *val; 688 689 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 690 691 PetscFunctionBegin; 692 if (!aij->donotstash && !mat->nooffprocentries) { 693 while (1) { 694 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 695 if (!flg) break; 696 697 for (i=0; i<n;) { 698 /* Now identify the consecutive vals belonging to the same row */ 699 for (j=i,rstart=row[j]; j<n; j++) { 700 if (row[j] != rstart) break; 701 } 702 if (j < n) ncols = j-i; 703 else ncols = n-i; 704 /* Now assemble all these values with a single function call */ 705 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 706 i = j; 707 } 708 } 709 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 710 } 711 #if defined(PETSC_HAVE_DEVICE) 712 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 713 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 714 if (mat->boundtocpu) { 715 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 716 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 717 } 718 #endif 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourself, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 730 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 #if defined(PETSC_HAVE_DEVICE) 739 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 740 #endif 741 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 742 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 743 744 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 745 746 aij->rowvalues = NULL; 747 748 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 749 750 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 751 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 752 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 753 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 754 } 755 #if defined(PETSC_HAVE_DEVICE) 756 mat->offloadmask = PETSC_OFFLOAD_BOTH; 757 #endif 758 PetscFunctionReturn(0); 759 } 760 761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 762 { 763 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 764 PetscErrorCode ierr; 765 766 PetscFunctionBegin; 767 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 768 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 773 { 774 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 775 PetscObjectState sA, sB; 776 PetscInt *lrows; 777 PetscInt r, len; 778 PetscBool cong, lch, gch; 779 PetscErrorCode ierr; 780 781 PetscFunctionBegin; 782 /* get locally owned rows */ 783 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 784 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 791 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 792 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 795 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 796 } 797 798 sA = mat->A->nonzerostate; 799 sB = mat->B->nonzerostate; 800 801 if (diag != 0.0 && cong) { 802 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 803 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 804 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 805 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 806 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 807 PetscInt nnwA, nnwB; 808 PetscBool nnzA, nnzB; 809 810 nnwA = aijA->nonew; 811 nnwB = aijB->nonew; 812 nnzA = aijA->keepnonzeropattern; 813 nnzB = aijB->keepnonzeropattern; 814 if (!nnzA) { 815 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 816 aijA->nonew = 0; 817 } 818 if (!nnzB) { 819 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 820 aijB->nonew = 0; 821 } 822 /* Must zero here before the next loop */ 823 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 824 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 for (r = 0; r < len; ++r) { 826 const PetscInt row = lrows[r] + A->rmap->rstart; 827 if (row >= A->cmap->N) continue; 828 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 829 } 830 aijA->nonew = nnwA; 831 aijB->nonew = nnwB; 832 } else { 833 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 834 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 835 } 836 ierr = PetscFree(lrows);CHKERRQ(ierr); 837 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 840 /* reduce nonzerostate */ 841 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 842 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 843 if (gch) A->nonzerostate++; 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 848 { 849 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 850 PetscErrorCode ierr; 851 PetscMPIInt n = A->rmap->n; 852 PetscInt i,j,r,m,len = 0; 853 PetscInt *lrows,*owners = A->rmap->range; 854 PetscMPIInt p = 0; 855 PetscSFNode *rrows; 856 PetscSF sf; 857 const PetscScalar *xx; 858 PetscScalar *bb,*mask,*aij_a; 859 Vec xmask,lmask; 860 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 861 const PetscInt *aj, *ii,*ridx; 862 PetscScalar *aa; 863 864 PetscFunctionBegin; 865 /* Create SF where leaves are input rows and roots are owned rows */ 866 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 867 for (r = 0; r < n; ++r) lrows[r] = -1; 868 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 869 for (r = 0; r < N; ++r) { 870 const PetscInt idx = rows[r]; 871 PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 872 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 873 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 874 } 875 rrows[r].rank = p; 876 rrows[r].index = rows[r] - owners[p]; 877 } 878 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 879 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 880 /* Collect flags for rows to be zeroed */ 881 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 882 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 883 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 884 /* Compress and put in row numbers */ 885 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 886 /* zero diagonal part of matrix */ 887 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 888 /* handle off diagonal part of matrix */ 889 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 890 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 891 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 892 for (i=0; i<len; i++) bb[lrows[i]] = 1; 893 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 894 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 897 if (x && b) { /* this code is buggy when the row and column layout don't match */ 898 PetscBool cong; 899 900 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 901 PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 902 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 905 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 906 } 907 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 908 /* remove zeroed rows of off diagonal matrix */ 909 ierr = MatSeqAIJGetArray(l->B,&aij_a);CHKERRQ(ierr); 910 ii = aij->i; 911 for (i=0; i<len; i++) { 912 ierr = PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 913 } 914 /* loop over all elements of off process part of matrix zeroing removed columns*/ 915 if (aij->compressedrow.use) { 916 m = aij->compressedrow.nrows; 917 ii = aij->compressedrow.i; 918 ridx = aij->compressedrow.rindex; 919 for (i=0; i<m; i++) { 920 n = ii[i+1] - ii[i]; 921 aj = aij->j + ii[i]; 922 aa = aij_a + ii[i]; 923 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[*ridx] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 ridx++; 933 } 934 } else { /* do not use compressed row format */ 935 m = l->B->rmap->n; 936 for (i=0; i<m; i++) { 937 n = ii[i+1] - ii[i]; 938 aj = aij->j + ii[i]; 939 aa = aij_a + ii[i]; 940 for (j=0; j<n; j++) { 941 if (PetscAbsScalar(mask[*aj])) { 942 if (b) bb[i] -= *aa*xx[*aj]; 943 *aa = 0.0; 944 } 945 aa++; 946 aj++; 947 } 948 } 949 } 950 if (x && b) { 951 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 952 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 953 } 954 ierr = MatSeqAIJRestoreArray(l->B,&aij_a);CHKERRQ(ierr); 955 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 956 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 957 ierr = PetscFree(lrows);CHKERRQ(ierr); 958 959 /* only change matrix nonzero state if pattern was allowed to be changed */ 960 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 961 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 962 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 963 } 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 PetscErrorCode ierr; 971 PetscInt nt; 972 VecScatter Mvctx = a->Mvctx; 973 974 PetscFunctionBegin; 975 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 976 PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 977 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 979 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 985 { 986 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 995 { 996 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 997 PetscErrorCode ierr; 998 VecScatter Mvctx = a->Mvctx; 999 1000 PetscFunctionBegin; 1001 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1003 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1009 { 1010 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1011 PetscErrorCode ierr; 1012 1013 PetscFunctionBegin; 1014 /* do nondiagonal part */ 1015 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1016 /* do local part */ 1017 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1018 /* add partial results together */ 1019 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1020 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1025 { 1026 MPI_Comm comm; 1027 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1028 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1029 IS Me,Notme; 1030 PetscErrorCode ierr; 1031 PetscInt M,N,first,last,*notme,i; 1032 PetscBool lf; 1033 PetscMPIInt size; 1034 1035 PetscFunctionBegin; 1036 /* Easy test: symmetric diagonal block */ 1037 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1038 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1039 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1040 if (!*f) PetscFunctionReturn(0); 1041 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1042 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1043 if (size == 1) PetscFunctionReturn(0); 1044 1045 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1046 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1047 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1048 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1049 for (i=0; i<first; i++) notme[i] = i; 1050 for (i=last; i<M; i++) notme[i-last+first] = i; 1051 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1052 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1053 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1054 Aoff = Aoffs[0]; 1055 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1056 Boff = Boffs[0]; 1057 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1058 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1059 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1060 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1061 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1062 ierr = PetscFree(notme);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1067 { 1068 PetscErrorCode ierr; 1069 1070 PetscFunctionBegin; 1071 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 1080 PetscFunctionBegin; 1081 /* do nondiagonal part */ 1082 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1083 /* do local part */ 1084 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1085 /* add partial results together */ 1086 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 /* 1092 This only works correctly for square matrices where the subblock A->A is the 1093 diagonal block 1094 */ 1095 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1096 { 1097 PetscErrorCode ierr; 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 1100 PetscFunctionBegin; 1101 PetscCheckFalse(A->rmap->N != A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1102 PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1103 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1108 { 1109 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1110 PetscErrorCode ierr; 1111 1112 PetscFunctionBegin; 1113 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1114 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1119 { 1120 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1121 PetscErrorCode ierr; 1122 1123 PetscFunctionBegin; 1124 #if defined(PETSC_USE_LOG) 1125 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1126 #endif 1127 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1128 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1129 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1130 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1131 #if defined(PETSC_USE_CTABLE) 1132 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1133 #else 1134 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1135 #endif 1136 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1137 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1138 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1139 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1140 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1141 1142 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1143 ierr = PetscSFDestroy(&aij->coo_sf);CHKERRQ(ierr); 1144 ierr = PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1);CHKERRQ(ierr); 1145 ierr = PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2);CHKERRQ(ierr); 1146 ierr = PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2);CHKERRQ(ierr); 1147 ierr = PetscFree2(aij->sendbuf,aij->recvbuf);CHKERRQ(ierr); 1148 ierr = PetscFree(aij->Cperm1);CHKERRQ(ierr); 1149 1150 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1151 1152 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1153 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1154 1155 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1156 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1157 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1159 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1160 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1161 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1165 #if defined(PETSC_HAVE_CUDA) 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1167 #endif 1168 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1170 #endif 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1172 #if defined(PETSC_HAVE_ELEMENTAL) 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1174 #endif 1175 #if defined(PETSC_HAVE_SCALAPACK) 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1177 #endif 1178 #if defined(PETSC_HAVE_HYPRE) 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1181 #endif 1182 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1188 #if defined(PETSC_HAVE_MKL_SPARSE) 1189 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1190 #endif 1191 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1192 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1193 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1194 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL);CHKERRQ(ierr); 1195 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1200 { 1201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1202 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1203 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1204 const PetscInt *garray = aij->garray; 1205 const PetscScalar *aa,*ba; 1206 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1207 PetscInt *rowlens; 1208 PetscInt *colidxs; 1209 PetscScalar *matvals; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1214 1215 M = mat->rmap->N; 1216 N = mat->cmap->N; 1217 m = mat->rmap->n; 1218 rs = mat->rmap->rstart; 1219 cs = mat->cmap->rstart; 1220 nz = A->nz + B->nz; 1221 1222 /* write matrix header */ 1223 header[0] = MAT_FILE_CLASSID; 1224 header[1] = M; header[2] = N; header[3] = nz; 1225 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1226 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1227 1228 /* fill in and store row lengths */ 1229 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1230 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1231 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1232 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1233 1234 /* fill in and store column indices */ 1235 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1236 for (cnt=0, i=0; i<m; i++) { 1237 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 colidxs[cnt++] = garray[B->j[jb]]; 1240 } 1241 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1242 colidxs[cnt++] = A->j[ja] + cs; 1243 for (; jb<B->i[i+1]; jb++) 1244 colidxs[cnt++] = garray[B->j[jb]]; 1245 } 1246 PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1247 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1248 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1249 1250 /* fill in and store nonzero values */ 1251 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1252 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1253 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1254 for (cnt=0, i=0; i<m; i++) { 1255 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1256 if (garray[B->j[jb]] > cs) break; 1257 matvals[cnt++] = ba[jb]; 1258 } 1259 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1260 matvals[cnt++] = aa[ja]; 1261 for (; jb<B->i[i+1]; jb++) 1262 matvals[cnt++] = ba[jb]; 1263 } 1264 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1265 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1266 PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1267 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1268 ierr = PetscFree(matvals);CHKERRQ(ierr); 1269 1270 /* write block size option to the viewer's .info file */ 1271 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #include <petscdraw.h> 1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1277 { 1278 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1279 PetscErrorCode ierr; 1280 PetscMPIInt rank = aij->rank,size = aij->size; 1281 PetscBool isdraw,iascii,isbinary; 1282 PetscViewer sviewer; 1283 PetscViewerFormat format; 1284 1285 PetscFunctionBegin; 1286 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1287 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1288 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1289 if (iascii) { 1290 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1291 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1292 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1293 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1294 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1295 for (i=0; i<(PetscInt)size; i++) { 1296 nmax = PetscMax(nmax,nz[i]); 1297 nmin = PetscMin(nmin,nz[i]); 1298 navg += nz[i]; 1299 } 1300 ierr = PetscFree(nz);CHKERRQ(ierr); 1301 navg = navg/size; 1302 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1306 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1307 MatInfo info; 1308 PetscInt *inodes=NULL; 1309 1310 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1311 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1312 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1313 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1314 if (!inodes) { 1315 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1316 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1317 } else { 1318 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1319 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1320 } 1321 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1323 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1324 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1325 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1326 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1328 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1331 PetscInt inodecount,inodelimit,*inodes; 1332 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1333 if (inodes) { 1334 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit);CHKERRQ(ierr); 1335 } else { 1336 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1337 } 1338 PetscFunctionReturn(0); 1339 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1340 PetscFunctionReturn(0); 1341 } 1342 } else if (isbinary) { 1343 if (size == 1) { 1344 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1345 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1346 } else { 1347 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1348 } 1349 PetscFunctionReturn(0); 1350 } else if (iascii && size == 1) { 1351 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1352 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } else if (isdraw) { 1355 PetscDraw draw; 1356 PetscBool isnull; 1357 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1358 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1359 if (isnull) PetscFunctionReturn(0); 1360 } 1361 1362 { /* assemble the entire matrix onto first processor */ 1363 Mat A = NULL, Av; 1364 IS isrow,iscol; 1365 1366 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1367 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1368 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1369 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1370 /* The commented code uses MatCreateSubMatrices instead */ 1371 /* 1372 Mat *AA, A = NULL, Av; 1373 IS isrow,iscol; 1374 1375 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1376 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1377 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1378 if (rank == 0) { 1379 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1380 A = AA[0]; 1381 Av = AA[0]; 1382 } 1383 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1384 */ 1385 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1386 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1387 /* 1388 Everyone has to call to draw the matrix since the graphics waits are 1389 synchronized across all processors that share the PetscDraw object 1390 */ 1391 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1392 if (rank == 0) { 1393 if (((PetscObject)mat)->name) { 1394 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1395 } 1396 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1397 } 1398 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1399 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1400 ierr = MatDestroy(&A);CHKERRQ(ierr); 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1406 { 1407 PetscErrorCode ierr; 1408 PetscBool iascii,isdraw,issocket,isbinary; 1409 1410 PetscFunctionBegin; 1411 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1412 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1413 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1414 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1415 if (iascii || isdraw || isbinary || issocket) { 1416 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1422 { 1423 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1424 PetscErrorCode ierr; 1425 Vec bb1 = NULL; 1426 PetscBool hasop; 1427 1428 PetscFunctionBegin; 1429 if (flag == SOR_APPLY_UPPER) { 1430 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1435 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1436 } 1437 1438 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1439 if (flag & SOR_ZERO_INITIAL_GUESS) { 1440 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1441 its--; 1442 } 1443 1444 while (its--) { 1445 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1446 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1447 1448 /* update rhs: bb1 = bb - B*x */ 1449 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1450 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1451 1452 /* local sweep */ 1453 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1454 } 1455 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1456 if (flag & SOR_ZERO_INITIAL_GUESS) { 1457 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1458 its--; 1459 } 1460 while (its--) { 1461 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1462 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1463 1464 /* update rhs: bb1 = bb - B*x */ 1465 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1466 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1467 1468 /* local sweep */ 1469 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1470 } 1471 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1472 if (flag & SOR_ZERO_INITIAL_GUESS) { 1473 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1474 its--; 1475 } 1476 while (its--) { 1477 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1478 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1479 1480 /* update rhs: bb1 = bb - B*x */ 1481 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1482 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1483 1484 /* local sweep */ 1485 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1486 } 1487 } else if (flag & SOR_EISENSTAT) { 1488 Vec xx1; 1489 1490 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1491 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1492 1493 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1494 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1495 if (!mat->diag) { 1496 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1497 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1498 } 1499 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1500 if (hasop) { 1501 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1502 } else { 1503 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1504 } 1505 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1506 1507 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1508 1509 /* local sweep */ 1510 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1511 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1512 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1513 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1514 1515 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1516 1517 matin->factorerrortype = mat->A->factorerrortype; 1518 PetscFunctionReturn(0); 1519 } 1520 1521 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1522 { 1523 Mat aA,aB,Aperm; 1524 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1525 PetscScalar *aa,*ba; 1526 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1527 PetscSF rowsf,sf; 1528 IS parcolp = NULL; 1529 PetscBool done; 1530 PetscErrorCode ierr; 1531 1532 PetscFunctionBegin; 1533 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1534 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1535 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1536 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1537 1538 /* Invert row permutation to find out where my rows should go */ 1539 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1540 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1541 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1542 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1543 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1544 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1545 1546 /* Invert column permutation to find out where my columns should go */ 1547 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1548 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1549 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1550 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1551 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1552 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1553 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1554 1555 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1556 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1557 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1558 1559 /* Find out where my gcols should go */ 1560 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1561 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1562 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1563 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1564 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1565 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1566 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1567 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1568 1569 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1570 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1571 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1572 for (i=0; i<m; i++) { 1573 PetscInt row = rdest[i]; 1574 PetscMPIInt rowner; 1575 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1576 for (j=ai[i]; j<ai[i+1]; j++) { 1577 PetscInt col = cdest[aj[j]]; 1578 PetscMPIInt cowner; 1579 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1580 if (rowner == cowner) dnnz[i]++; 1581 else onnz[i]++; 1582 } 1583 for (j=bi[i]; j<bi[i+1]; j++) { 1584 PetscInt col = gcdest[bj[j]]; 1585 PetscMPIInt cowner; 1586 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1587 if (rowner == cowner) dnnz[i]++; 1588 else onnz[i]++; 1589 } 1590 } 1591 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1592 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1593 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1594 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1595 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1596 1597 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1598 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1599 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1600 for (i=0; i<m; i++) { 1601 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1602 PetscInt j0,rowlen; 1603 rowlen = ai[i+1] - ai[i]; 1604 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1605 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1606 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1607 } 1608 rowlen = bi[i+1] - bi[i]; 1609 for (j0=j=0; j<rowlen; j0=j) { 1610 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1611 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1612 } 1613 } 1614 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1615 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1616 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1617 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1618 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1619 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1620 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1621 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1622 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1623 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1624 *B = Aperm; 1625 PetscFunctionReturn(0); 1626 } 1627 1628 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1629 { 1630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1631 PetscErrorCode ierr; 1632 1633 PetscFunctionBegin; 1634 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1635 if (ghosts) *ghosts = aij->garray; 1636 PetscFunctionReturn(0); 1637 } 1638 1639 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1640 { 1641 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1642 Mat A = mat->A,B = mat->B; 1643 PetscErrorCode ierr; 1644 PetscLogDouble isend[5],irecv[5]; 1645 1646 PetscFunctionBegin; 1647 info->block_size = 1.0; 1648 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1649 1650 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1651 isend[3] = info->memory; isend[4] = info->mallocs; 1652 1653 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1654 1655 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1656 isend[3] += info->memory; isend[4] += info->mallocs; 1657 if (flag == MAT_LOCAL) { 1658 info->nz_used = isend[0]; 1659 info->nz_allocated = isend[1]; 1660 info->nz_unneeded = isend[2]; 1661 info->memory = isend[3]; 1662 info->mallocs = isend[4]; 1663 } else if (flag == MAT_GLOBAL_MAX) { 1664 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } else if (flag == MAT_GLOBAL_SUM) { 1672 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } 1680 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1681 info->fill_ratio_needed = 0; 1682 info->factor_mallocs = 0; 1683 PetscFunctionReturn(0); 1684 } 1685 1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 PetscErrorCode ierr; 1690 1691 PetscFunctionBegin; 1692 switch (op) { 1693 case MAT_NEW_NONZERO_LOCATIONS: 1694 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1695 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1696 case MAT_KEEP_NONZERO_PATTERN: 1697 case MAT_NEW_NONZERO_LOCATION_ERR: 1698 case MAT_USE_INODES: 1699 case MAT_IGNORE_ZERO_ENTRIES: 1700 case MAT_FORM_EXPLICIT_TRANSPOSE: 1701 MatCheckPreallocated(A,1); 1702 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1703 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1704 break; 1705 case MAT_ROW_ORIENTED: 1706 MatCheckPreallocated(A,1); 1707 a->roworiented = flg; 1708 1709 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1710 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1711 break; 1712 case MAT_FORCE_DIAGONAL_ENTRIES: 1713 case MAT_SORTED_FULL: 1714 ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1715 break; 1716 case MAT_IGNORE_OFF_PROC_ENTRIES: 1717 a->donotstash = flg; 1718 break; 1719 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1720 case MAT_SPD: 1721 case MAT_SYMMETRIC: 1722 case MAT_STRUCTURALLY_SYMMETRIC: 1723 case MAT_HERMITIAN: 1724 case MAT_SYMMETRY_ETERNAL: 1725 break; 1726 case MAT_SUBMAT_SINGLEIS: 1727 A->submat_singleis = flg; 1728 break; 1729 case MAT_STRUCTURE_ONLY: 1730 /* The option is handled directly by MatSetOption() */ 1731 break; 1732 default: 1733 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1734 } 1735 PetscFunctionReturn(0); 1736 } 1737 1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1739 { 1740 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1741 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1742 PetscErrorCode ierr; 1743 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1744 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1745 PetscInt *cmap,*idx_p; 1746 1747 PetscFunctionBegin; 1748 PetscCheckFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1749 mat->getrowactive = PETSC_TRUE; 1750 1751 if (!mat->rowvalues && (idx || v)) { 1752 /* 1753 allocate enough space to hold information from the longest row. 1754 */ 1755 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1756 PetscInt max = 1,tmp; 1757 for (i=0; i<matin->rmap->n; i++) { 1758 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1759 if (max < tmp) max = tmp; 1760 } 1761 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1762 } 1763 1764 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1765 lrow = row - rstart; 1766 1767 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1768 if (!v) {pvA = NULL; pvB = NULL;} 1769 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1770 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1771 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1772 nztot = nzA + nzB; 1773 1774 cmap = mat->garray; 1775 if (v || idx) { 1776 if (nztot) { 1777 /* Sort by increasing column numbers, assuming A and B already sorted */ 1778 PetscInt imark = -1; 1779 if (v) { 1780 *v = v_p = mat->rowvalues; 1781 for (i=0; i<nzB; i++) { 1782 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1783 else break; 1784 } 1785 imark = i; 1786 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1787 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1788 } 1789 if (idx) { 1790 *idx = idx_p = mat->rowindices; 1791 if (imark > -1) { 1792 for (i=0; i<imark; i++) { 1793 idx_p[i] = cmap[cworkB[i]]; 1794 } 1795 } else { 1796 for (i=0; i<nzB; i++) { 1797 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1798 else break; 1799 } 1800 imark = i; 1801 } 1802 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1803 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1804 } 1805 } else { 1806 if (idx) *idx = NULL; 1807 if (v) *v = NULL; 1808 } 1809 } 1810 *nz = nztot; 1811 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1812 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1817 { 1818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1819 1820 PetscFunctionBegin; 1821 PetscCheckFalse(!aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1822 aij->getrowactive = PETSC_FALSE; 1823 PetscFunctionReturn(0); 1824 } 1825 1826 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1827 { 1828 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1829 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1830 PetscErrorCode ierr; 1831 PetscInt i,j,cstart = mat->cmap->rstart; 1832 PetscReal sum = 0.0; 1833 const MatScalar *v,*amata,*bmata; 1834 1835 PetscFunctionBegin; 1836 if (aij->size == 1) { 1837 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1838 } else { 1839 ierr = MatSeqAIJGetArrayRead(aij->A,&amata);CHKERRQ(ierr); 1840 ierr = MatSeqAIJGetArrayRead(aij->B,&bmata);CHKERRQ(ierr); 1841 if (type == NORM_FROBENIUS) { 1842 v = amata; 1843 for (i=0; i<amat->nz; i++) { 1844 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1845 } 1846 v = bmata; 1847 for (i=0; i<bmat->nz; i++) { 1848 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1849 } 1850 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1851 *norm = PetscSqrtReal(*norm); 1852 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1853 } else if (type == NORM_1) { /* max column norm */ 1854 PetscReal *tmp,*tmp2; 1855 PetscInt *jj,*garray = aij->garray; 1856 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1857 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1858 *norm = 0.0; 1859 v = amata; jj = amat->j; 1860 for (j=0; j<amat->nz; j++) { 1861 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1862 } 1863 v = bmata; jj = bmat->j; 1864 for (j=0; j<bmat->nz; j++) { 1865 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1866 } 1867 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1868 for (j=0; j<mat->cmap->N; j++) { 1869 if (tmp2[j] > *norm) *norm = tmp2[j]; 1870 } 1871 ierr = PetscFree(tmp);CHKERRQ(ierr); 1872 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1873 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1874 } else if (type == NORM_INFINITY) { /* max row norm */ 1875 PetscReal ntemp = 0.0; 1876 for (j=0; j<aij->A->rmap->n; j++) { 1877 v = amata + amat->i[j]; 1878 sum = 0.0; 1879 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1880 sum += PetscAbsScalar(*v); v++; 1881 } 1882 v = bmata + bmat->i[j]; 1883 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1884 sum += PetscAbsScalar(*v); v++; 1885 } 1886 if (sum > ntemp) ntemp = sum; 1887 } 1888 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1889 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1890 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1891 ierr = MatSeqAIJRestoreArrayRead(aij->A,&amata);CHKERRQ(ierr); 1892 ierr = MatSeqAIJRestoreArrayRead(aij->B,&bmata);CHKERRQ(ierr); 1893 } 1894 PetscFunctionReturn(0); 1895 } 1896 1897 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1898 { 1899 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1900 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1901 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1902 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1903 PetscErrorCode ierr; 1904 Mat B,A_diag,*B_diag; 1905 const MatScalar *pbv,*bv; 1906 1907 PetscFunctionBegin; 1908 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1909 ai = Aloc->i; aj = Aloc->j; 1910 bi = Bloc->i; bj = Bloc->j; 1911 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1912 PetscInt *d_nnz,*g_nnz,*o_nnz; 1913 PetscSFNode *oloc; 1914 PETSC_UNUSED PetscSF sf; 1915 1916 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1917 /* compute d_nnz for preallocation */ 1918 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1919 for (i=0; i<ai[ma]; i++) { 1920 d_nnz[aj[i]]++; 1921 } 1922 /* compute local off-diagonal contributions */ 1923 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1924 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1925 /* map those to global */ 1926 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1927 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1928 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1929 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1930 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1931 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1932 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1933 1934 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1935 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1936 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1937 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1938 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1939 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1940 } else { 1941 B = *matout; 1942 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1943 } 1944 1945 b = (Mat_MPIAIJ*)B->data; 1946 A_diag = a->A; 1947 B_diag = &b->A; 1948 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1949 A_diag_ncol = A_diag->cmap->N; 1950 B_diag_ilen = sub_B_diag->ilen; 1951 B_diag_i = sub_B_diag->i; 1952 1953 /* Set ilen for diagonal of B */ 1954 for (i=0; i<A_diag_ncol; i++) { 1955 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1956 } 1957 1958 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1959 very quickly (=without using MatSetValues), because all writes are local. */ 1960 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1961 1962 /* copy over the B part */ 1963 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1964 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1965 pbv = bv; 1966 row = A->rmap->rstart; 1967 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1968 cols_tmp = cols; 1969 for (i=0; i<mb; i++) { 1970 ncol = bi[i+1]-bi[i]; 1971 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1972 row++; 1973 pbv += ncol; cols_tmp += ncol; 1974 } 1975 ierr = PetscFree(cols);CHKERRQ(ierr); 1976 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1977 1978 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1979 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1980 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1981 *matout = B; 1982 } else { 1983 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1984 } 1985 PetscFunctionReturn(0); 1986 } 1987 1988 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1989 { 1990 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1991 Mat a = aij->A,b = aij->B; 1992 PetscErrorCode ierr; 1993 PetscInt s1,s2,s3; 1994 1995 PetscFunctionBegin; 1996 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1997 if (rr) { 1998 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1999 PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2000 /* Overlap communication with computation. */ 2001 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2002 } 2003 if (ll) { 2004 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2005 PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2006 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2007 } 2008 /* scale the diagonal block */ 2009 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2010 2011 if (rr) { 2012 /* Do a scatter end and then right scale the off-diagonal block */ 2013 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2014 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2015 } 2016 PetscFunctionReturn(0); 2017 } 2018 2019 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2020 { 2021 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2022 PetscErrorCode ierr; 2023 2024 PetscFunctionBegin; 2025 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2030 { 2031 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2032 Mat a,b,c,d; 2033 PetscBool flg; 2034 PetscErrorCode ierr; 2035 2036 PetscFunctionBegin; 2037 a = matA->A; b = matA->B; 2038 c = matB->A; d = matB->B; 2039 2040 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2041 if (flg) { 2042 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2043 } 2044 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2045 PetscFunctionReturn(0); 2046 } 2047 2048 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2049 { 2050 PetscErrorCode ierr; 2051 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2052 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2053 2054 PetscFunctionBegin; 2055 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2056 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2057 /* because of the column compression in the off-processor part of the matrix a->B, 2058 the number of columns in a->B and b->B may be different, hence we cannot call 2059 the MatCopy() directly on the two parts. If need be, we can provide a more 2060 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2061 then copying the submatrices */ 2062 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2063 } else { 2064 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2065 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2066 } 2067 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2068 PetscFunctionReturn(0); 2069 } 2070 2071 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2072 { 2073 PetscErrorCode ierr; 2074 2075 PetscFunctionBegin; 2076 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2077 PetscFunctionReturn(0); 2078 } 2079 2080 /* 2081 Computes the number of nonzeros per row needed for preallocation when X and Y 2082 have different nonzero structure. 2083 */ 2084 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2085 { 2086 PetscInt i,j,k,nzx,nzy; 2087 2088 PetscFunctionBegin; 2089 /* Set the number of nonzeros in the new matrix */ 2090 for (i=0; i<m; i++) { 2091 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2092 nzx = xi[i+1] - xi[i]; 2093 nzy = yi[i+1] - yi[i]; 2094 nnz[i] = 0; 2095 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2096 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2097 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2098 nnz[i]++; 2099 } 2100 for (; k<nzy; k++) nnz[i]++; 2101 } 2102 PetscFunctionReturn(0); 2103 } 2104 2105 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2106 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2107 { 2108 PetscErrorCode ierr; 2109 PetscInt m = Y->rmap->N; 2110 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2111 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2112 2113 PetscFunctionBegin; 2114 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2119 { 2120 PetscErrorCode ierr; 2121 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2122 2123 PetscFunctionBegin; 2124 if (str == SAME_NONZERO_PATTERN) { 2125 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2126 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2127 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2128 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2129 } else { 2130 Mat B; 2131 PetscInt *nnz_d,*nnz_o; 2132 2133 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2134 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2135 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2136 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2137 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2138 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2139 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2140 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2141 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2142 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2143 ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr); 2144 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2145 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2151 2152 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2153 { 2154 #if defined(PETSC_USE_COMPLEX) 2155 PetscErrorCode ierr; 2156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2157 2158 PetscFunctionBegin; 2159 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2160 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2161 #else 2162 PetscFunctionBegin; 2163 #endif 2164 PetscFunctionReturn(0); 2165 } 2166 2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2168 { 2169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2174 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2179 { 2180 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2185 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2190 { 2191 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2192 PetscErrorCode ierr; 2193 PetscInt i,*idxb = NULL,m = A->rmap->n; 2194 PetscScalar *va,*vv; 2195 Vec vB,vA; 2196 const PetscScalar *vb; 2197 2198 PetscFunctionBegin; 2199 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2200 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2201 2202 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2203 if (idx) { 2204 for (i=0; i<m; i++) { 2205 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2206 } 2207 } 2208 2209 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2210 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2211 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2212 2213 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2214 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2215 for (i=0; i<m; i++) { 2216 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2217 vv[i] = vb[i]; 2218 if (idx) idx[i] = a->garray[idxb[i]]; 2219 } else { 2220 vv[i] = va[i]; 2221 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2222 idx[i] = a->garray[idxb[i]]; 2223 } 2224 } 2225 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2226 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2227 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2228 ierr = PetscFree(idxb);CHKERRQ(ierr); 2229 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2230 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2235 { 2236 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2237 PetscInt m = A->rmap->n,n = A->cmap->n; 2238 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2239 PetscInt *cmap = mat->garray; 2240 PetscInt *diagIdx, *offdiagIdx; 2241 Vec diagV, offdiagV; 2242 PetscScalar *a, *diagA, *offdiagA; 2243 const PetscScalar *ba,*bav; 2244 PetscInt r,j,col,ncols,*bi,*bj; 2245 PetscErrorCode ierr; 2246 Mat B = mat->B; 2247 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2248 2249 PetscFunctionBegin; 2250 /* When a process holds entire A and other processes have no entry */ 2251 if (A->cmap->N == n) { 2252 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2253 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2254 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2255 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2256 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2257 PetscFunctionReturn(0); 2258 } else if (n == 0) { 2259 if (m) { 2260 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2261 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2262 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2263 } 2264 PetscFunctionReturn(0); 2265 } 2266 2267 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2268 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2269 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2270 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2271 2272 /* Get offdiagIdx[] for implicit 0.0 */ 2273 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2274 ba = bav; 2275 bi = b->i; 2276 bj = b->j; 2277 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2278 for (r = 0; r < m; r++) { 2279 ncols = bi[r+1] - bi[r]; 2280 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2281 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2282 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2283 offdiagA[r] = 0.0; 2284 2285 /* Find first hole in the cmap */ 2286 for (j=0; j<ncols; j++) { 2287 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2288 if (col > j && j < cstart) { 2289 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2290 break; 2291 } else if (col > j + n && j >= cstart) { 2292 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2293 break; 2294 } 2295 } 2296 if (j == ncols && ncols < A->cmap->N - n) { 2297 /* a hole is outside compressed Bcols */ 2298 if (ncols == 0) { 2299 if (cstart) { 2300 offdiagIdx[r] = 0; 2301 } else offdiagIdx[r] = cend; 2302 } else { /* ncols > 0 */ 2303 offdiagIdx[r] = cmap[ncols-1] + 1; 2304 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2305 } 2306 } 2307 } 2308 2309 for (j=0; j<ncols; j++) { 2310 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2311 ba++; bj++; 2312 } 2313 } 2314 2315 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2316 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2317 for (r = 0; r < m; ++r) { 2318 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2319 a[r] = diagA[r]; 2320 if (idx) idx[r] = cstart + diagIdx[r]; 2321 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 if (idx) { 2324 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2325 idx[r] = cstart + diagIdx[r]; 2326 } else idx[r] = offdiagIdx[r]; 2327 } 2328 } else { 2329 a[r] = offdiagA[r]; 2330 if (idx) idx[r] = offdiagIdx[r]; 2331 } 2332 } 2333 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2334 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2335 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2336 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2337 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2338 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2339 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2344 { 2345 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2346 PetscInt m = A->rmap->n,n = A->cmap->n; 2347 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2348 PetscInt *cmap = mat->garray; 2349 PetscInt *diagIdx, *offdiagIdx; 2350 Vec diagV, offdiagV; 2351 PetscScalar *a, *diagA, *offdiagA; 2352 const PetscScalar *ba,*bav; 2353 PetscInt r,j,col,ncols,*bi,*bj; 2354 PetscErrorCode ierr; 2355 Mat B = mat->B; 2356 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2357 2358 PetscFunctionBegin; 2359 /* When a process holds entire A and other processes have no entry */ 2360 if (A->cmap->N == n) { 2361 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2362 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2363 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2364 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2365 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2366 PetscFunctionReturn(0); 2367 } else if (n == 0) { 2368 if (m) { 2369 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2370 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2371 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2372 } 2373 PetscFunctionReturn(0); 2374 } 2375 2376 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2377 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2378 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2379 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2380 2381 /* Get offdiagIdx[] for implicit 0.0 */ 2382 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2383 ba = bav; 2384 bi = b->i; 2385 bj = b->j; 2386 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2387 for (r = 0; r < m; r++) { 2388 ncols = bi[r+1] - bi[r]; 2389 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2390 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2391 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2392 offdiagA[r] = 0.0; 2393 2394 /* Find first hole in the cmap */ 2395 for (j=0; j<ncols; j++) { 2396 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2397 if (col > j && j < cstart) { 2398 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2399 break; 2400 } else if (col > j + n && j >= cstart) { 2401 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2402 break; 2403 } 2404 } 2405 if (j == ncols && ncols < A->cmap->N - n) { 2406 /* a hole is outside compressed Bcols */ 2407 if (ncols == 0) { 2408 if (cstart) { 2409 offdiagIdx[r] = 0; 2410 } else offdiagIdx[r] = cend; 2411 } else { /* ncols > 0 */ 2412 offdiagIdx[r] = cmap[ncols-1] + 1; 2413 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2414 } 2415 } 2416 } 2417 2418 for (j=0; j<ncols; j++) { 2419 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2420 ba++; bj++; 2421 } 2422 } 2423 2424 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2425 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2426 for (r = 0; r < m; ++r) { 2427 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2428 a[r] = diagA[r]; 2429 if (idx) idx[r] = cstart + diagIdx[r]; 2430 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2431 a[r] = diagA[r]; 2432 if (idx) { 2433 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2434 idx[r] = cstart + diagIdx[r]; 2435 } else idx[r] = offdiagIdx[r]; 2436 } 2437 } else { 2438 a[r] = offdiagA[r]; 2439 if (idx) idx[r] = offdiagIdx[r]; 2440 } 2441 } 2442 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2443 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2444 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2445 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2446 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2447 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2448 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2453 { 2454 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2455 PetscInt m = A->rmap->n,n = A->cmap->n; 2456 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2457 PetscInt *cmap = mat->garray; 2458 PetscInt *diagIdx, *offdiagIdx; 2459 Vec diagV, offdiagV; 2460 PetscScalar *a, *diagA, *offdiagA; 2461 const PetscScalar *ba,*bav; 2462 PetscInt r,j,col,ncols,*bi,*bj; 2463 PetscErrorCode ierr; 2464 Mat B = mat->B; 2465 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2466 2467 PetscFunctionBegin; 2468 /* When a process holds entire A and other processes have no entry */ 2469 if (A->cmap->N == n) { 2470 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2471 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2472 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2473 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2474 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2475 PetscFunctionReturn(0); 2476 } else if (n == 0) { 2477 if (m) { 2478 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2479 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2480 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2481 } 2482 PetscFunctionReturn(0); 2483 } 2484 2485 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2486 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2487 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2488 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r+1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2500 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2501 offdiagA[r] = 0.0; 2502 2503 /* Find first hole in the cmap */ 2504 for (j=0; j<ncols; j++) { 2505 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2506 if (col > j && j < cstart) { 2507 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2508 break; 2509 } else if (col > j + n && j >= cstart) { 2510 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2511 break; 2512 } 2513 } 2514 if (j == ncols && ncols < A->cmap->N - n) { 2515 /* a hole is outside compressed Bcols */ 2516 if (ncols == 0) { 2517 if (cstart) { 2518 offdiagIdx[r] = 0; 2519 } else offdiagIdx[r] = cend; 2520 } else { /* ncols > 0 */ 2521 offdiagIdx[r] = cmap[ncols-1] + 1; 2522 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2523 } 2524 } 2525 } 2526 2527 for (j=0; j<ncols; j++) { 2528 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2529 ba++; bj++; 2530 } 2531 } 2532 2533 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2534 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2535 for (r = 0; r < m; ++r) { 2536 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2537 a[r] = diagA[r]; 2538 if (idx) idx[r] = cstart + diagIdx[r]; 2539 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2540 a[r] = diagA[r]; 2541 if (idx) { 2542 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2543 idx[r] = cstart + diagIdx[r]; 2544 } else idx[r] = offdiagIdx[r]; 2545 } 2546 } else { 2547 a[r] = offdiagA[r]; 2548 if (idx) idx[r] = offdiagIdx[r]; 2549 } 2550 } 2551 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2552 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2553 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2554 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2555 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2556 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2557 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2562 { 2563 PetscErrorCode ierr; 2564 Mat *dummy; 2565 2566 PetscFunctionBegin; 2567 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2568 *newmat = *dummy; 2569 ierr = PetscFree(dummy);CHKERRQ(ierr); 2570 PetscFunctionReturn(0); 2571 } 2572 2573 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2574 { 2575 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2576 PetscErrorCode ierr; 2577 2578 PetscFunctionBegin; 2579 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2580 A->factorerrortype = a->A->factorerrortype; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2585 { 2586 PetscErrorCode ierr; 2587 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2588 2589 PetscFunctionBegin; 2590 PetscCheckFalse(!x->assembled && !x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2591 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2592 if (x->assembled) { 2593 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2594 } else { 2595 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2596 } 2597 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2598 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2599 PetscFunctionReturn(0); 2600 } 2601 2602 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2603 { 2604 PetscFunctionBegin; 2605 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2606 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2607 PetscFunctionReturn(0); 2608 } 2609 2610 /*@ 2611 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2612 2613 Collective on Mat 2614 2615 Input Parameters: 2616 + A - the matrix 2617 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2618 2619 Level: advanced 2620 2621 @*/ 2622 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2623 { 2624 PetscErrorCode ierr; 2625 2626 PetscFunctionBegin; 2627 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2628 PetscFunctionReturn(0); 2629 } 2630 2631 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2632 { 2633 PetscErrorCode ierr; 2634 PetscBool sc = PETSC_FALSE,flg; 2635 2636 PetscFunctionBegin; 2637 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2638 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2639 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2640 if (flg) { 2641 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2642 } 2643 ierr = PetscOptionsTail();CHKERRQ(ierr); 2644 PetscFunctionReturn(0); 2645 } 2646 2647 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2648 { 2649 PetscErrorCode ierr; 2650 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2651 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2652 2653 PetscFunctionBegin; 2654 if (!Y->preallocated) { 2655 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2656 } else if (!aij->nz) { 2657 PetscInt nonew = aij->nonew; 2658 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2659 aij->nonew = nonew; 2660 } 2661 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2666 { 2667 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2668 PetscErrorCode ierr; 2669 2670 PetscFunctionBegin; 2671 PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2672 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2673 if (d) { 2674 PetscInt rstart; 2675 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2676 *d += rstart; 2677 2678 } 2679 PetscFunctionReturn(0); 2680 } 2681 2682 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2683 { 2684 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2685 PetscErrorCode ierr; 2686 2687 PetscFunctionBegin; 2688 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2689 PetscFunctionReturn(0); 2690 } 2691 2692 /* -------------------------------------------------------------------*/ 2693 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2694 MatGetRow_MPIAIJ, 2695 MatRestoreRow_MPIAIJ, 2696 MatMult_MPIAIJ, 2697 /* 4*/ MatMultAdd_MPIAIJ, 2698 MatMultTranspose_MPIAIJ, 2699 MatMultTransposeAdd_MPIAIJ, 2700 NULL, 2701 NULL, 2702 NULL, 2703 /*10*/ NULL, 2704 NULL, 2705 NULL, 2706 MatSOR_MPIAIJ, 2707 MatTranspose_MPIAIJ, 2708 /*15*/ MatGetInfo_MPIAIJ, 2709 MatEqual_MPIAIJ, 2710 MatGetDiagonal_MPIAIJ, 2711 MatDiagonalScale_MPIAIJ, 2712 MatNorm_MPIAIJ, 2713 /*20*/ MatAssemblyBegin_MPIAIJ, 2714 MatAssemblyEnd_MPIAIJ, 2715 MatSetOption_MPIAIJ, 2716 MatZeroEntries_MPIAIJ, 2717 /*24*/ MatZeroRows_MPIAIJ, 2718 NULL, 2719 NULL, 2720 NULL, 2721 NULL, 2722 /*29*/ MatSetUp_MPIAIJ, 2723 NULL, 2724 NULL, 2725 MatGetDiagonalBlock_MPIAIJ, 2726 NULL, 2727 /*34*/ MatDuplicate_MPIAIJ, 2728 NULL, 2729 NULL, 2730 NULL, 2731 NULL, 2732 /*39*/ MatAXPY_MPIAIJ, 2733 MatCreateSubMatrices_MPIAIJ, 2734 MatIncreaseOverlap_MPIAIJ, 2735 MatGetValues_MPIAIJ, 2736 MatCopy_MPIAIJ, 2737 /*44*/ MatGetRowMax_MPIAIJ, 2738 MatScale_MPIAIJ, 2739 MatShift_MPIAIJ, 2740 MatDiagonalSet_MPIAIJ, 2741 MatZeroRowsColumns_MPIAIJ, 2742 /*49*/ MatSetRandom_MPIAIJ, 2743 NULL, 2744 NULL, 2745 NULL, 2746 NULL, 2747 /*54*/ MatFDColoringCreate_MPIXAIJ, 2748 NULL, 2749 MatSetUnfactored_MPIAIJ, 2750 MatPermute_MPIAIJ, 2751 NULL, 2752 /*59*/ MatCreateSubMatrix_MPIAIJ, 2753 MatDestroy_MPIAIJ, 2754 MatView_MPIAIJ, 2755 NULL, 2756 NULL, 2757 /*64*/ NULL, 2758 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2763 MatGetRowMinAbs_MPIAIJ, 2764 NULL, 2765 NULL, 2766 NULL, 2767 NULL, 2768 /*75*/ MatFDColoringApply_AIJ, 2769 MatSetFromOptions_MPIAIJ, 2770 NULL, 2771 NULL, 2772 MatFindZeroDiagonals_MPIAIJ, 2773 /*80*/ NULL, 2774 NULL, 2775 NULL, 2776 /*83*/ MatLoad_MPIAIJ, 2777 MatIsSymmetric_MPIAIJ, 2778 NULL, 2779 NULL, 2780 NULL, 2781 NULL, 2782 /*89*/ NULL, 2783 NULL, 2784 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2785 NULL, 2786 NULL, 2787 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2788 NULL, 2789 NULL, 2790 NULL, 2791 MatBindToCPU_MPIAIJ, 2792 /*99*/ MatProductSetFromOptions_MPIAIJ, 2793 NULL, 2794 NULL, 2795 MatConjugate_MPIAIJ, 2796 NULL, 2797 /*104*/MatSetValuesRow_MPIAIJ, 2798 MatRealPart_MPIAIJ, 2799 MatImaginaryPart_MPIAIJ, 2800 NULL, 2801 NULL, 2802 /*109*/NULL, 2803 NULL, 2804 MatGetRowMin_MPIAIJ, 2805 NULL, 2806 MatMissingDiagonal_MPIAIJ, 2807 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2808 NULL, 2809 MatGetGhosts_MPIAIJ, 2810 NULL, 2811 NULL, 2812 /*119*/MatMultDiagonalBlock_MPIAIJ, 2813 NULL, 2814 NULL, 2815 NULL, 2816 MatGetMultiProcBlock_MPIAIJ, 2817 /*124*/MatFindNonzeroRows_MPIAIJ, 2818 MatGetColumnReductions_MPIAIJ, 2819 MatInvertBlockDiagonal_MPIAIJ, 2820 MatInvertVariableBlockDiagonal_MPIAIJ, 2821 MatCreateSubMatricesMPI_MPIAIJ, 2822 /*129*/NULL, 2823 NULL, 2824 NULL, 2825 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2826 NULL, 2827 /*134*/NULL, 2828 NULL, 2829 NULL, 2830 NULL, 2831 NULL, 2832 /*139*/MatSetBlockSizes_MPIAIJ, 2833 NULL, 2834 NULL, 2835 MatFDColoringSetUp_MPIXAIJ, 2836 MatFindOffBlockDiagonalEntries_MPIAIJ, 2837 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2838 /*145*/NULL, 2839 NULL, 2840 NULL 2841 }; 2842 2843 /* ----------------------------------------------------------------------------------------*/ 2844 2845 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2846 { 2847 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2848 PetscErrorCode ierr; 2849 2850 PetscFunctionBegin; 2851 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2852 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2853 PetscFunctionReturn(0); 2854 } 2855 2856 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2857 { 2858 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2859 PetscErrorCode ierr; 2860 2861 PetscFunctionBegin; 2862 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2863 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2864 PetscFunctionReturn(0); 2865 } 2866 2867 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2868 { 2869 Mat_MPIAIJ *b; 2870 PetscErrorCode ierr; 2871 PetscMPIInt size; 2872 2873 PetscFunctionBegin; 2874 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2875 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2876 b = (Mat_MPIAIJ*)B->data; 2877 2878 #if defined(PETSC_USE_CTABLE) 2879 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2880 #else 2881 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2882 #endif 2883 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2884 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2885 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2886 2887 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2888 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2889 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2890 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2891 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2892 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2893 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2894 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2895 2896 if (!B->preallocated) { 2897 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2898 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2899 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2900 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2901 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2902 } 2903 2904 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2905 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2906 B->preallocated = PETSC_TRUE; 2907 B->was_assembled = PETSC_FALSE; 2908 B->assembled = PETSC_FALSE; 2909 PetscFunctionReturn(0); 2910 } 2911 2912 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2913 { 2914 Mat_MPIAIJ *b; 2915 PetscErrorCode ierr; 2916 2917 PetscFunctionBegin; 2918 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2919 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2920 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2921 b = (Mat_MPIAIJ*)B->data; 2922 2923 #if defined(PETSC_USE_CTABLE) 2924 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2925 #else 2926 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2927 #endif 2928 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2929 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2930 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2931 2932 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2933 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2934 B->preallocated = PETSC_TRUE; 2935 B->was_assembled = PETSC_FALSE; 2936 B->assembled = PETSC_FALSE; 2937 PetscFunctionReturn(0); 2938 } 2939 2940 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2941 { 2942 Mat mat; 2943 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2944 PetscErrorCode ierr; 2945 2946 PetscFunctionBegin; 2947 *newmat = NULL; 2948 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2949 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2950 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2951 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2952 a = (Mat_MPIAIJ*)mat->data; 2953 2954 mat->factortype = matin->factortype; 2955 mat->assembled = matin->assembled; 2956 mat->insertmode = NOT_SET_VALUES; 2957 mat->preallocated = matin->preallocated; 2958 2959 a->size = oldmat->size; 2960 a->rank = oldmat->rank; 2961 a->donotstash = oldmat->donotstash; 2962 a->roworiented = oldmat->roworiented; 2963 a->rowindices = NULL; 2964 a->rowvalues = NULL; 2965 a->getrowactive = PETSC_FALSE; 2966 2967 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2968 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2969 2970 if (oldmat->colmap) { 2971 #if defined(PETSC_USE_CTABLE) 2972 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2973 #else 2974 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2975 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2976 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2977 #endif 2978 } else a->colmap = NULL; 2979 if (oldmat->garray) { 2980 PetscInt len; 2981 len = oldmat->B->cmap->n; 2982 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2983 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2984 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2985 } else a->garray = NULL; 2986 2987 /* It may happen MatDuplicate is called with a non-assembled matrix 2988 In fact, MatDuplicate only requires the matrix to be preallocated 2989 This may happen inside a DMCreateMatrix_Shell */ 2990 if (oldmat->lvec) { 2991 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2992 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2993 } 2994 if (oldmat->Mvctx) { 2995 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2996 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2997 } 2998 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2999 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3000 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3001 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3002 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3003 *newmat = mat; 3004 PetscFunctionReturn(0); 3005 } 3006 3007 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3008 { 3009 PetscBool isbinary, ishdf5; 3010 PetscErrorCode ierr; 3011 3012 PetscFunctionBegin; 3013 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3014 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3015 /* force binary viewer to load .info file if it has not yet done so */ 3016 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3017 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3018 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3019 if (isbinary) { 3020 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3021 } else if (ishdf5) { 3022 #if defined(PETSC_HAVE_HDF5) 3023 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3024 #else 3025 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3026 #endif 3027 } else { 3028 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3029 } 3030 PetscFunctionReturn(0); 3031 } 3032 3033 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3034 { 3035 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3036 PetscInt *rowidxs,*colidxs; 3037 PetscScalar *matvals; 3038 PetscErrorCode ierr; 3039 3040 PetscFunctionBegin; 3041 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3042 3043 /* read in matrix header */ 3044 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3045 PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3046 M = header[1]; N = header[2]; nz = header[3]; 3047 PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3048 PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3049 PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3050 3051 /* set block sizes from the viewer's .info file */ 3052 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3053 /* set global sizes if not set already */ 3054 if (mat->rmap->N < 0) mat->rmap->N = M; 3055 if (mat->cmap->N < 0) mat->cmap->N = N; 3056 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3057 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3058 3059 /* check if the matrix sizes are correct */ 3060 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3061 PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3062 3063 /* read in row lengths and build row indices */ 3064 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3065 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3066 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3067 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3068 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3069 PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3070 /* read in column indices and matrix values */ 3071 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3072 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3073 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3074 /* store matrix indices and values */ 3075 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3076 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3077 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3078 PetscFunctionReturn(0); 3079 } 3080 3081 /* Not scalable because of ISAllGather() unless getting all columns. */ 3082 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3083 { 3084 PetscErrorCode ierr; 3085 IS iscol_local; 3086 PetscBool isstride; 3087 PetscMPIInt lisstride=0,gisstride; 3088 3089 PetscFunctionBegin; 3090 /* check if we are grabbing all columns*/ 3091 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3092 3093 if (isstride) { 3094 PetscInt start,len,mstart,mlen; 3095 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3096 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3097 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3098 if (mstart == start && mlen-mstart == len) lisstride = 1; 3099 } 3100 3101 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3102 if (gisstride) { 3103 PetscInt N; 3104 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3105 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3106 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3107 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3108 } else { 3109 PetscInt cbs; 3110 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3111 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3112 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3113 } 3114 3115 *isseq = iscol_local; 3116 PetscFunctionReturn(0); 3117 } 3118 3119 /* 3120 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3121 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3122 3123 Input Parameters: 3124 mat - matrix 3125 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3126 i.e., mat->rstart <= isrow[i] < mat->rend 3127 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3128 i.e., mat->cstart <= iscol[i] < mat->cend 3129 Output Parameter: 3130 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3131 iscol_o - sequential column index set for retrieving mat->B 3132 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3133 */ 3134 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3135 { 3136 PetscErrorCode ierr; 3137 Vec x,cmap; 3138 const PetscInt *is_idx; 3139 PetscScalar *xarray,*cmaparray; 3140 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3141 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3142 Mat B=a->B; 3143 Vec lvec=a->lvec,lcmap; 3144 PetscInt i,cstart,cend,Bn=B->cmap->N; 3145 MPI_Comm comm; 3146 VecScatter Mvctx=a->Mvctx; 3147 3148 PetscFunctionBegin; 3149 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3150 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3151 3152 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3153 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3154 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3155 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3156 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3157 3158 /* Get start indices */ 3159 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3160 isstart -= ncols; 3161 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3162 3163 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3164 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3165 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3166 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3167 for (i=0; i<ncols; i++) { 3168 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3169 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3170 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3171 } 3172 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3173 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3174 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3175 3176 /* Get iscol_d */ 3177 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3178 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3179 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3180 3181 /* Get isrow_d */ 3182 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3183 rstart = mat->rmap->rstart; 3184 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3185 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3186 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3187 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3188 3189 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3190 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3191 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3192 3193 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3194 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3195 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3196 3197 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3198 3199 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3200 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3201 3202 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3203 /* off-process column indices */ 3204 count = 0; 3205 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3206 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3207 3208 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3209 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3210 for (i=0; i<Bn; i++) { 3211 if (PetscRealPart(xarray[i]) > -1.0) { 3212 idx[count] = i; /* local column index in off-diagonal part B */ 3213 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3214 count++; 3215 } 3216 } 3217 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3218 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3219 3220 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3221 /* cannot ensure iscol_o has same blocksize as iscol! */ 3222 3223 ierr = PetscFree(idx);CHKERRQ(ierr); 3224 *garray = cmap1; 3225 3226 ierr = VecDestroy(&x);CHKERRQ(ierr); 3227 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3228 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3229 PetscFunctionReturn(0); 3230 } 3231 3232 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3233 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3234 { 3235 PetscErrorCode ierr; 3236 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3237 Mat M = NULL; 3238 MPI_Comm comm; 3239 IS iscol_d,isrow_d,iscol_o; 3240 Mat Asub = NULL,Bsub = NULL; 3241 PetscInt n; 3242 3243 PetscFunctionBegin; 3244 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3245 3246 if (call == MAT_REUSE_MATRIX) { 3247 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3248 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3249 PetscCheckFalse(!isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3250 3251 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3252 PetscCheckFalse(!iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3253 3254 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3255 PetscCheckFalse(!iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3256 3257 /* Update diagonal and off-diagonal portions of submat */ 3258 asub = (Mat_MPIAIJ*)(*submat)->data; 3259 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3260 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3261 if (n) { 3262 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3263 } 3264 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3266 3267 } else { /* call == MAT_INITIAL_MATRIX) */ 3268 const PetscInt *garray; 3269 PetscInt BsubN; 3270 3271 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3272 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3273 3274 /* Create local submatrices Asub and Bsub */ 3275 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3276 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3277 3278 /* Create submatrix M */ 3279 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3280 3281 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3282 asub = (Mat_MPIAIJ*)M->data; 3283 3284 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3285 n = asub->B->cmap->N; 3286 if (BsubN > n) { 3287 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3288 const PetscInt *idx; 3289 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3290 ierr = PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3291 3292 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3293 j = 0; 3294 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3295 for (i=0; i<n; i++) { 3296 if (j >= BsubN) break; 3297 while (subgarray[i] > garray[j]) j++; 3298 3299 if (subgarray[i] == garray[j]) { 3300 idx_new[i] = idx[j++]; 3301 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3302 } 3303 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3304 3305 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3306 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3307 3308 } else if (BsubN < n) { 3309 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3310 } 3311 3312 ierr = PetscFree(garray);CHKERRQ(ierr); 3313 *submat = M; 3314 3315 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3316 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3317 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3318 3319 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3320 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3321 3322 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3323 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3324 } 3325 PetscFunctionReturn(0); 3326 } 3327 3328 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3329 { 3330 PetscErrorCode ierr; 3331 IS iscol_local=NULL,isrow_d; 3332 PetscInt csize; 3333 PetscInt n,i,j,start,end; 3334 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3335 MPI_Comm comm; 3336 3337 PetscFunctionBegin; 3338 /* If isrow has same processor distribution as mat, 3339 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3340 if (call == MAT_REUSE_MATRIX) { 3341 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3342 if (isrow_d) { 3343 sameRowDist = PETSC_TRUE; 3344 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3345 } else { 3346 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3347 if (iscol_local) { 3348 sameRowDist = PETSC_TRUE; 3349 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3350 } 3351 } 3352 } else { 3353 /* Check if isrow has same processor distribution as mat */ 3354 sameDist[0] = PETSC_FALSE; 3355 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3356 if (!n) { 3357 sameDist[0] = PETSC_TRUE; 3358 } else { 3359 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3360 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3361 if (i >= start && j < end) { 3362 sameDist[0] = PETSC_TRUE; 3363 } 3364 } 3365 3366 /* Check if iscol has same processor distribution as mat */ 3367 sameDist[1] = PETSC_FALSE; 3368 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3369 if (!n) { 3370 sameDist[1] = PETSC_TRUE; 3371 } else { 3372 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3373 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3374 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3375 } 3376 3377 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3378 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3379 sameRowDist = tsameDist[0]; 3380 } 3381 3382 if (sameRowDist) { 3383 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3384 /* isrow and iscol have same processor distribution as mat */ 3385 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3386 PetscFunctionReturn(0); 3387 } else { /* sameRowDist */ 3388 /* isrow has same processor distribution as mat */ 3389 if (call == MAT_INITIAL_MATRIX) { 3390 PetscBool sorted; 3391 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3392 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3393 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3394 PetscCheckFalse(n != i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3395 3396 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3397 if (sorted) { 3398 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3399 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3400 PetscFunctionReturn(0); 3401 } 3402 } else { /* call == MAT_REUSE_MATRIX */ 3403 IS iscol_sub; 3404 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3405 if (iscol_sub) { 3406 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3407 PetscFunctionReturn(0); 3408 } 3409 } 3410 } 3411 } 3412 3413 /* General case: iscol -> iscol_local which has global size of iscol */ 3414 if (call == MAT_REUSE_MATRIX) { 3415 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3416 PetscCheckFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3417 } else { 3418 if (!iscol_local) { 3419 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3420 } 3421 } 3422 3423 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3424 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3425 3426 if (call == MAT_INITIAL_MATRIX) { 3427 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3428 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3429 } 3430 PetscFunctionReturn(0); 3431 } 3432 3433 /*@C 3434 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3435 and "off-diagonal" part of the matrix in CSR format. 3436 3437 Collective 3438 3439 Input Parameters: 3440 + comm - MPI communicator 3441 . A - "diagonal" portion of matrix 3442 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3443 - garray - global index of B columns 3444 3445 Output Parameter: 3446 . mat - the matrix, with input A as its local diagonal matrix 3447 Level: advanced 3448 3449 Notes: 3450 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3451 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3452 3453 .seealso: MatCreateMPIAIJWithSplitArrays() 3454 @*/ 3455 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3456 { 3457 PetscErrorCode ierr; 3458 Mat_MPIAIJ *maij; 3459 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3460 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3461 const PetscScalar *oa; 3462 Mat Bnew; 3463 PetscInt m,n,N; 3464 3465 PetscFunctionBegin; 3466 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3467 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3468 PetscCheckFalse(m != B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3469 PetscCheckFalse(A->rmap->bs != B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3470 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3471 /* PetscCheckFalse(A->cmap->bs != B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3472 3473 /* Get global columns of mat */ 3474 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3475 3476 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3477 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3478 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3479 maij = (Mat_MPIAIJ*)(*mat)->data; 3480 3481 (*mat)->preallocated = PETSC_TRUE; 3482 3483 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3484 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3485 3486 /* Set A as diagonal portion of *mat */ 3487 maij->A = A; 3488 3489 nz = oi[m]; 3490 for (i=0; i<nz; i++) { 3491 col = oj[i]; 3492 oj[i] = garray[col]; 3493 } 3494 3495 /* Set Bnew as off-diagonal portion of *mat */ 3496 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3497 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3498 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3499 bnew = (Mat_SeqAIJ*)Bnew->data; 3500 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3501 maij->B = Bnew; 3502 3503 PetscCheckFalse(B->rmap->N != Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3504 3505 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3506 b->free_a = PETSC_FALSE; 3507 b->free_ij = PETSC_FALSE; 3508 ierr = MatDestroy(&B);CHKERRQ(ierr); 3509 3510 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3511 bnew->free_a = PETSC_TRUE; 3512 bnew->free_ij = PETSC_TRUE; 3513 3514 /* condense columns of maij->B */ 3515 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3516 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3517 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3518 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3519 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3520 PetscFunctionReturn(0); 3521 } 3522 3523 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3524 3525 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3526 { 3527 PetscErrorCode ierr; 3528 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3529 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3530 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3531 Mat M,Msub,B=a->B; 3532 MatScalar *aa; 3533 Mat_SeqAIJ *aij; 3534 PetscInt *garray = a->garray,*colsub,Ncols; 3535 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3536 IS iscol_sub,iscmap; 3537 const PetscInt *is_idx,*cmap; 3538 PetscBool allcolumns=PETSC_FALSE; 3539 MPI_Comm comm; 3540 3541 PetscFunctionBegin; 3542 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3543 if (call == MAT_REUSE_MATRIX) { 3544 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3545 PetscCheckFalse(!iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3546 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3547 3548 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3549 PetscCheckFalse(!iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3550 3551 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3552 PetscCheckFalse(!Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3553 3554 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3555 3556 } else { /* call == MAT_INITIAL_MATRIX) */ 3557 PetscBool flg; 3558 3559 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3560 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3561 3562 /* (1) iscol -> nonscalable iscol_local */ 3563 /* Check for special case: each processor gets entire matrix columns */ 3564 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3565 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3566 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3567 if (allcolumns) { 3568 iscol_sub = iscol_local; 3569 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3570 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3571 3572 } else { 3573 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3574 PetscInt *idx,*cmap1,k; 3575 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3576 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3577 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3578 count = 0; 3579 k = 0; 3580 for (i=0; i<Ncols; i++) { 3581 j = is_idx[i]; 3582 if (j >= cstart && j < cend) { 3583 /* diagonal part of mat */ 3584 idx[count] = j; 3585 cmap1[count++] = i; /* column index in submat */ 3586 } else if (Bn) { 3587 /* off-diagonal part of mat */ 3588 if (j == garray[k]) { 3589 idx[count] = j; 3590 cmap1[count++] = i; /* column index in submat */ 3591 } else if (j > garray[k]) { 3592 while (j > garray[k] && k < Bn-1) k++; 3593 if (j == garray[k]) { 3594 idx[count] = j; 3595 cmap1[count++] = i; /* column index in submat */ 3596 } 3597 } 3598 } 3599 } 3600 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3601 3602 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3603 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3604 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3605 3606 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3607 } 3608 3609 /* (3) Create sequential Msub */ 3610 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3611 } 3612 3613 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3614 aij = (Mat_SeqAIJ*)(Msub)->data; 3615 ii = aij->i; 3616 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3617 3618 /* 3619 m - number of local rows 3620 Ncols - number of columns (same on all processors) 3621 rstart - first row in new global matrix generated 3622 */ 3623 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3624 3625 if (call == MAT_INITIAL_MATRIX) { 3626 /* (4) Create parallel newmat */ 3627 PetscMPIInt rank,size; 3628 PetscInt csize; 3629 3630 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3631 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3632 3633 /* 3634 Determine the number of non-zeros in the diagonal and off-diagonal 3635 portions of the matrix in order to do correct preallocation 3636 */ 3637 3638 /* first get start and end of "diagonal" columns */ 3639 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3640 if (csize == PETSC_DECIDE) { 3641 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3642 if (mglobal == Ncols) { /* square matrix */ 3643 nlocal = m; 3644 } else { 3645 nlocal = Ncols/size + ((Ncols % size) > rank); 3646 } 3647 } else { 3648 nlocal = csize; 3649 } 3650 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3651 rstart = rend - nlocal; 3652 PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3653 3654 /* next, compute all the lengths */ 3655 jj = aij->j; 3656 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3657 olens = dlens + m; 3658 for (i=0; i<m; i++) { 3659 jend = ii[i+1] - ii[i]; 3660 olen = 0; 3661 dlen = 0; 3662 for (j=0; j<jend; j++) { 3663 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3664 else dlen++; 3665 jj++; 3666 } 3667 olens[i] = olen; 3668 dlens[i] = dlen; 3669 } 3670 3671 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3672 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3673 3674 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3675 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3676 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3677 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3678 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3679 ierr = PetscFree(dlens);CHKERRQ(ierr); 3680 3681 } else { /* call == MAT_REUSE_MATRIX */ 3682 M = *newmat; 3683 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3684 PetscCheckFalse(i != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3685 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3686 /* 3687 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3688 rather than the slower MatSetValues(). 3689 */ 3690 M->was_assembled = PETSC_TRUE; 3691 M->assembled = PETSC_FALSE; 3692 } 3693 3694 /* (5) Set values of Msub to *newmat */ 3695 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3696 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3697 3698 jj = aij->j; 3699 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3700 for (i=0; i<m; i++) { 3701 row = rstart + i; 3702 nz = ii[i+1] - ii[i]; 3703 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3704 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3705 jj += nz; aa += nz; 3706 } 3707 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3708 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3709 3710 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3712 3713 ierr = PetscFree(colsub);CHKERRQ(ierr); 3714 3715 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3716 if (call == MAT_INITIAL_MATRIX) { 3717 *newmat = M; 3718 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3719 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3720 3721 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3722 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3723 3724 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3725 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3726 3727 if (iscol_local) { 3728 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3729 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3730 } 3731 } 3732 PetscFunctionReturn(0); 3733 } 3734 3735 /* 3736 Not great since it makes two copies of the submatrix, first an SeqAIJ 3737 in local and then by concatenating the local matrices the end result. 3738 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3739 3740 Note: This requires a sequential iscol with all indices. 3741 */ 3742 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3743 { 3744 PetscErrorCode ierr; 3745 PetscMPIInt rank,size; 3746 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3747 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3748 Mat M,Mreuse; 3749 MatScalar *aa,*vwork; 3750 MPI_Comm comm; 3751 Mat_SeqAIJ *aij; 3752 PetscBool colflag,allcolumns=PETSC_FALSE; 3753 3754 PetscFunctionBegin; 3755 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3756 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3757 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3758 3759 /* Check for special case: each processor gets entire matrix columns */ 3760 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3761 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3762 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3763 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3764 3765 if (call == MAT_REUSE_MATRIX) { 3766 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3767 PetscCheckFalse(!Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3768 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3769 } else { 3770 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3771 } 3772 3773 /* 3774 m - number of local rows 3775 n - number of columns (same on all processors) 3776 rstart - first row in new global matrix generated 3777 */ 3778 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3779 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3780 if (call == MAT_INITIAL_MATRIX) { 3781 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3782 ii = aij->i; 3783 jj = aij->j; 3784 3785 /* 3786 Determine the number of non-zeros in the diagonal and off-diagonal 3787 portions of the matrix in order to do correct preallocation 3788 */ 3789 3790 /* first get start and end of "diagonal" columns */ 3791 if (csize == PETSC_DECIDE) { 3792 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3793 if (mglobal == n) { /* square matrix */ 3794 nlocal = m; 3795 } else { 3796 nlocal = n/size + ((n % size) > rank); 3797 } 3798 } else { 3799 nlocal = csize; 3800 } 3801 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3802 rstart = rend - nlocal; 3803 PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3804 3805 /* next, compute all the lengths */ 3806 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3807 olens = dlens + m; 3808 for (i=0; i<m; i++) { 3809 jend = ii[i+1] - ii[i]; 3810 olen = 0; 3811 dlen = 0; 3812 for (j=0; j<jend; j++) { 3813 if (*jj < rstart || *jj >= rend) olen++; 3814 else dlen++; 3815 jj++; 3816 } 3817 olens[i] = olen; 3818 dlens[i] = dlen; 3819 } 3820 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3821 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3822 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3823 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3824 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3825 ierr = PetscFree(dlens);CHKERRQ(ierr); 3826 } else { 3827 PetscInt ml,nl; 3828 3829 M = *newmat; 3830 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3831 PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3832 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3833 /* 3834 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3835 rather than the slower MatSetValues(). 3836 */ 3837 M->was_assembled = PETSC_TRUE; 3838 M->assembled = PETSC_FALSE; 3839 } 3840 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3841 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3842 ii = aij->i; 3843 jj = aij->j; 3844 3845 /* trigger copy to CPU if needed */ 3846 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3847 for (i=0; i<m; i++) { 3848 row = rstart + i; 3849 nz = ii[i+1] - ii[i]; 3850 cwork = jj; jj += nz; 3851 vwork = aa; aa += nz; 3852 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3853 } 3854 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3855 3856 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3857 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3858 *newmat = M; 3859 3860 /* save submatrix used in processor for next request */ 3861 if (call == MAT_INITIAL_MATRIX) { 3862 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3863 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3864 } 3865 PetscFunctionReturn(0); 3866 } 3867 3868 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3869 { 3870 PetscInt m,cstart, cend,j,nnz,i,d; 3871 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3872 const PetscInt *JJ; 3873 PetscErrorCode ierr; 3874 PetscBool nooffprocentries; 3875 3876 PetscFunctionBegin; 3877 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3878 3879 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3880 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3881 m = B->rmap->n; 3882 cstart = B->cmap->rstart; 3883 cend = B->cmap->rend; 3884 rstart = B->rmap->rstart; 3885 3886 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3887 3888 if (PetscDefined(USE_DEBUG)) { 3889 for (i=0; i<m; i++) { 3890 nnz = Ii[i+1]- Ii[i]; 3891 JJ = J + Ii[i]; 3892 PetscCheckFalse(nnz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3893 PetscCheckFalse(nnz && (JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3894 PetscCheckFalse(nnz && (JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3895 } 3896 } 3897 3898 for (i=0; i<m; i++) { 3899 nnz = Ii[i+1]- Ii[i]; 3900 JJ = J + Ii[i]; 3901 nnz_max = PetscMax(nnz_max,nnz); 3902 d = 0; 3903 for (j=0; j<nnz; j++) { 3904 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3905 } 3906 d_nnz[i] = d; 3907 o_nnz[i] = nnz - d; 3908 } 3909 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3910 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3911 3912 for (i=0; i<m; i++) { 3913 ii = i + rstart; 3914 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3915 } 3916 nooffprocentries = B->nooffprocentries; 3917 B->nooffprocentries = PETSC_TRUE; 3918 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3919 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3920 B->nooffprocentries = nooffprocentries; 3921 3922 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3923 PetscFunctionReturn(0); 3924 } 3925 3926 /*@ 3927 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3928 (the default parallel PETSc format). 3929 3930 Collective 3931 3932 Input Parameters: 3933 + B - the matrix 3934 . i - the indices into j for the start of each local row (starts with zero) 3935 . j - the column indices for each local row (starts with zero) 3936 - v - optional values in the matrix 3937 3938 Level: developer 3939 3940 Notes: 3941 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3942 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3943 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3944 3945 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3946 3947 The format which is used for the sparse matrix input, is equivalent to a 3948 row-major ordering.. i.e for the following matrix, the input data expected is 3949 as shown 3950 3951 $ 1 0 0 3952 $ 2 0 3 P0 3953 $ ------- 3954 $ 4 5 6 P1 3955 $ 3956 $ Process0 [P0]: rows_owned=[0,1] 3957 $ i = {0,1,3} [size = nrow+1 = 2+1] 3958 $ j = {0,0,2} [size = 3] 3959 $ v = {1,2,3} [size = 3] 3960 $ 3961 $ Process1 [P1]: rows_owned=[2] 3962 $ i = {0,3} [size = nrow+1 = 1+1] 3963 $ j = {0,1,2} [size = 3] 3964 $ v = {4,5,6} [size = 3] 3965 3966 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3967 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3968 @*/ 3969 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3970 { 3971 PetscErrorCode ierr; 3972 3973 PetscFunctionBegin; 3974 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3975 PetscFunctionReturn(0); 3976 } 3977 3978 /*@C 3979 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3980 (the default parallel PETSc format). For good matrix assembly performance 3981 the user should preallocate the matrix storage by setting the parameters 3982 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3983 performance can be increased by more than a factor of 50. 3984 3985 Collective 3986 3987 Input Parameters: 3988 + B - the matrix 3989 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3990 (same value is used for all local rows) 3991 . d_nnz - array containing the number of nonzeros in the various rows of the 3992 DIAGONAL portion of the local submatrix (possibly different for each row) 3993 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3994 The size of this array is equal to the number of local rows, i.e 'm'. 3995 For matrices that will be factored, you must leave room for (and set) 3996 the diagonal entry even if it is zero. 3997 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3998 submatrix (same value is used for all local rows). 3999 - o_nnz - array containing the number of nonzeros in the various rows of the 4000 OFF-DIAGONAL portion of the local submatrix (possibly different for 4001 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4002 structure. The size of this array is equal to the number 4003 of local rows, i.e 'm'. 4004 4005 If the *_nnz parameter is given then the *_nz parameter is ignored 4006 4007 The AIJ format (also called the Yale sparse matrix format or 4008 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4009 storage. The stored row and column indices begin with zero. 4010 See Users-Manual: ch_mat for details. 4011 4012 The parallel matrix is partitioned such that the first m0 rows belong to 4013 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4014 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4015 4016 The DIAGONAL portion of the local submatrix of a processor can be defined 4017 as the submatrix which is obtained by extraction the part corresponding to 4018 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4019 first row that belongs to the processor, r2 is the last row belonging to 4020 the this processor, and c1-c2 is range of indices of the local part of a 4021 vector suitable for applying the matrix to. This is an mxn matrix. In the 4022 common case of a square matrix, the row and column ranges are the same and 4023 the DIAGONAL part is also square. The remaining portion of the local 4024 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4025 4026 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4027 4028 You can call MatGetInfo() to get information on how effective the preallocation was; 4029 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4030 You can also run with the option -info and look for messages with the string 4031 malloc in them to see if additional memory allocation was needed. 4032 4033 Example usage: 4034 4035 Consider the following 8x8 matrix with 34 non-zero values, that is 4036 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4037 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4038 as follows: 4039 4040 .vb 4041 1 2 0 | 0 3 0 | 0 4 4042 Proc0 0 5 6 | 7 0 0 | 8 0 4043 9 0 10 | 11 0 0 | 12 0 4044 ------------------------------------- 4045 13 0 14 | 15 16 17 | 0 0 4046 Proc1 0 18 0 | 19 20 21 | 0 0 4047 0 0 0 | 22 23 0 | 24 0 4048 ------------------------------------- 4049 Proc2 25 26 27 | 0 0 28 | 29 0 4050 30 0 0 | 31 32 33 | 0 34 4051 .ve 4052 4053 This can be represented as a collection of submatrices as: 4054 4055 .vb 4056 A B C 4057 D E F 4058 G H I 4059 .ve 4060 4061 Where the submatrices A,B,C are owned by proc0, D,E,F are 4062 owned by proc1, G,H,I are owned by proc2. 4063 4064 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4065 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4066 The 'M','N' parameters are 8,8, and have the same values on all procs. 4067 4068 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4069 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4070 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4071 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4072 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4073 matrix, ans [DF] as another SeqAIJ matrix. 4074 4075 When d_nz, o_nz parameters are specified, d_nz storage elements are 4076 allocated for every row of the local diagonal submatrix, and o_nz 4077 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4078 One way to choose d_nz and o_nz is to use the max nonzerors per local 4079 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4080 In this case, the values of d_nz,o_nz are: 4081 .vb 4082 proc0 : dnz = 2, o_nz = 2 4083 proc1 : dnz = 3, o_nz = 2 4084 proc2 : dnz = 1, o_nz = 4 4085 .ve 4086 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4087 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4088 for proc3. i.e we are using 12+15+10=37 storage locations to store 4089 34 values. 4090 4091 When d_nnz, o_nnz parameters are specified, the storage is specified 4092 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4093 In the above case the values for d_nnz,o_nnz are: 4094 .vb 4095 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4096 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4097 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4098 .ve 4099 Here the space allocated is sum of all the above values i.e 34, and 4100 hence pre-allocation is perfect. 4101 4102 Level: intermediate 4103 4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4105 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4106 @*/ 4107 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4108 { 4109 PetscErrorCode ierr; 4110 4111 PetscFunctionBegin; 4112 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4113 PetscValidType(B,1); 4114 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4115 PetscFunctionReturn(0); 4116 } 4117 4118 /*@ 4119 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4120 CSR format for the local rows. 4121 4122 Collective 4123 4124 Input Parameters: 4125 + comm - MPI communicator 4126 . m - number of local rows (Cannot be PETSC_DECIDE) 4127 . n - This value should be the same as the local size used in creating the 4128 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4129 calculated if N is given) For square matrices n is almost always m. 4130 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4131 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4132 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4133 . j - column indices 4134 - a - matrix values 4135 4136 Output Parameter: 4137 . mat - the matrix 4138 4139 Level: intermediate 4140 4141 Notes: 4142 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4143 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4144 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4145 4146 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4147 4148 The format which is used for the sparse matrix input, is equivalent to a 4149 row-major ordering.. i.e for the following matrix, the input data expected is 4150 as shown 4151 4152 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4153 4154 $ 1 0 0 4155 $ 2 0 3 P0 4156 $ ------- 4157 $ 4 5 6 P1 4158 $ 4159 $ Process0 [P0]: rows_owned=[0,1] 4160 $ i = {0,1,3} [size = nrow+1 = 2+1] 4161 $ j = {0,0,2} [size = 3] 4162 $ v = {1,2,3} [size = 3] 4163 $ 4164 $ Process1 [P1]: rows_owned=[2] 4165 $ i = {0,3} [size = nrow+1 = 1+1] 4166 $ j = {0,1,2} [size = 3] 4167 $ v = {4,5,6} [size = 3] 4168 4169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4170 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4171 @*/ 4172 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4173 { 4174 PetscErrorCode ierr; 4175 4176 PetscFunctionBegin; 4177 PetscCheckFalse(i && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4178 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4179 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4180 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4181 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4182 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4183 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4184 PetscFunctionReturn(0); 4185 } 4186 4187 /*@ 4188 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4189 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4190 4191 Collective 4192 4193 Input Parameters: 4194 + mat - the matrix 4195 . m - number of local rows (Cannot be PETSC_DECIDE) 4196 . n - This value should be the same as the local size used in creating the 4197 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4198 calculated if N is given) For square matrices n is almost always m. 4199 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4200 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4201 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4202 . J - column indices 4203 - v - matrix values 4204 4205 Level: intermediate 4206 4207 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4208 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4209 @*/ 4210 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4211 { 4212 PetscErrorCode ierr; 4213 PetscInt cstart,nnz,i,j; 4214 PetscInt *ld; 4215 PetscBool nooffprocentries; 4216 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4217 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4218 PetscScalar *ad,*ao; 4219 const PetscInt *Adi = Ad->i; 4220 PetscInt ldi,Iii,md; 4221 4222 PetscFunctionBegin; 4223 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4224 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4225 PetscCheckFalse(m != mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4226 PetscCheckFalse(n != mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4227 4228 ierr = MatSeqAIJGetArrayWrite(Aij->A,&ad);CHKERRQ(ierr); 4229 ierr = MatSeqAIJGetArrayWrite(Aij->B,&ao);CHKERRQ(ierr); 4230 cstart = mat->cmap->rstart; 4231 if (!Aij->ld) { 4232 /* count number of entries below block diagonal */ 4233 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4234 Aij->ld = ld; 4235 for (i=0; i<m; i++) { 4236 nnz = Ii[i+1]- Ii[i]; 4237 j = 0; 4238 while (J[j] < cstart && j < nnz) {j++;} 4239 J += nnz; 4240 ld[i] = j; 4241 } 4242 } else { 4243 ld = Aij->ld; 4244 } 4245 4246 for (i=0; i<m; i++) { 4247 nnz = Ii[i+1]- Ii[i]; 4248 Iii = Ii[i]; 4249 ldi = ld[i]; 4250 md = Adi[i+1]-Adi[i]; 4251 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4252 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4253 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4254 ad += md; 4255 ao += nnz - md; 4256 } 4257 nooffprocentries = mat->nooffprocentries; 4258 mat->nooffprocentries = PETSC_TRUE; 4259 ierr = MatSeqAIJRestoreArrayWrite(Aij->A,&ad);CHKERRQ(ierr); 4260 ierr = MatSeqAIJRestoreArrayWrite(Aij->B,&ao);CHKERRQ(ierr); 4261 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4262 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4263 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4264 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4265 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4266 mat->nooffprocentries = nooffprocentries; 4267 PetscFunctionReturn(0); 4268 } 4269 4270 /*@C 4271 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4272 (the default parallel PETSc format). For good matrix assembly performance 4273 the user should preallocate the matrix storage by setting the parameters 4274 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4275 performance can be increased by more than a factor of 50. 4276 4277 Collective 4278 4279 Input Parameters: 4280 + comm - MPI communicator 4281 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4282 This value should be the same as the local size used in creating the 4283 y vector for the matrix-vector product y = Ax. 4284 . n - This value should be the same as the local size used in creating the 4285 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4286 calculated if N is given) For square matrices n is almost always m. 4287 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4288 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4289 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4290 (same value is used for all local rows) 4291 . d_nnz - array containing the number of nonzeros in the various rows of the 4292 DIAGONAL portion of the local submatrix (possibly different for each row) 4293 or NULL, if d_nz is used to specify the nonzero structure. 4294 The size of this array is equal to the number of local rows, i.e 'm'. 4295 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4296 submatrix (same value is used for all local rows). 4297 - o_nnz - array containing the number of nonzeros in the various rows of the 4298 OFF-DIAGONAL portion of the local submatrix (possibly different for 4299 each row) or NULL, if o_nz is used to specify the nonzero 4300 structure. The size of this array is equal to the number 4301 of local rows, i.e 'm'. 4302 4303 Output Parameter: 4304 . A - the matrix 4305 4306 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4307 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4308 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4309 4310 Notes: 4311 If the *_nnz parameter is given then the *_nz parameter is ignored 4312 4313 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4314 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4315 storage requirements for this matrix. 4316 4317 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4318 processor than it must be used on all processors that share the object for 4319 that argument. 4320 4321 The user MUST specify either the local or global matrix dimensions 4322 (possibly both). 4323 4324 The parallel matrix is partitioned across processors such that the 4325 first m0 rows belong to process 0, the next m1 rows belong to 4326 process 1, the next m2 rows belong to process 2 etc.. where 4327 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4328 values corresponding to [m x N] submatrix. 4329 4330 The columns are logically partitioned with the n0 columns belonging 4331 to 0th partition, the next n1 columns belonging to the next 4332 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4333 4334 The DIAGONAL portion of the local submatrix on any given processor 4335 is the submatrix corresponding to the rows and columns m,n 4336 corresponding to the given processor. i.e diagonal matrix on 4337 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4338 etc. The remaining portion of the local submatrix [m x (N-n)] 4339 constitute the OFF-DIAGONAL portion. The example below better 4340 illustrates this concept. 4341 4342 For a square global matrix we define each processor's diagonal portion 4343 to be its local rows and the corresponding columns (a square submatrix); 4344 each processor's off-diagonal portion encompasses the remainder of the 4345 local matrix (a rectangular submatrix). 4346 4347 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4348 4349 When calling this routine with a single process communicator, a matrix of 4350 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4351 type of communicator, use the construction mechanism 4352 .vb 4353 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4354 .ve 4355 4356 $ MatCreate(...,&A); 4357 $ MatSetType(A,MATMPIAIJ); 4358 $ MatSetSizes(A, m,n,M,N); 4359 $ MatMPIAIJSetPreallocation(A,...); 4360 4361 By default, this format uses inodes (identical nodes) when possible. 4362 We search for consecutive rows with the same nonzero structure, thereby 4363 reusing matrix information to achieve increased efficiency. 4364 4365 Options Database Keys: 4366 + -mat_no_inode - Do not use inodes 4367 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4368 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4369 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4370 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4371 4372 Example usage: 4373 4374 Consider the following 8x8 matrix with 34 non-zero values, that is 4375 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4376 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4377 as follows 4378 4379 .vb 4380 1 2 0 | 0 3 0 | 0 4 4381 Proc0 0 5 6 | 7 0 0 | 8 0 4382 9 0 10 | 11 0 0 | 12 0 4383 ------------------------------------- 4384 13 0 14 | 15 16 17 | 0 0 4385 Proc1 0 18 0 | 19 20 21 | 0 0 4386 0 0 0 | 22 23 0 | 24 0 4387 ------------------------------------- 4388 Proc2 25 26 27 | 0 0 28 | 29 0 4389 30 0 0 | 31 32 33 | 0 34 4390 .ve 4391 4392 This can be represented as a collection of submatrices as 4393 4394 .vb 4395 A B C 4396 D E F 4397 G H I 4398 .ve 4399 4400 Where the submatrices A,B,C are owned by proc0, D,E,F are 4401 owned by proc1, G,H,I are owned by proc2. 4402 4403 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4404 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4405 The 'M','N' parameters are 8,8, and have the same values on all procs. 4406 4407 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4408 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4409 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4410 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4411 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4412 matrix, ans [DF] as another SeqAIJ matrix. 4413 4414 When d_nz, o_nz parameters are specified, d_nz storage elements are 4415 allocated for every row of the local diagonal submatrix, and o_nz 4416 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4417 One way to choose d_nz and o_nz is to use the max nonzerors per local 4418 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4419 In this case, the values of d_nz,o_nz are 4420 .vb 4421 proc0 : dnz = 2, o_nz = 2 4422 proc1 : dnz = 3, o_nz = 2 4423 proc2 : dnz = 1, o_nz = 4 4424 .ve 4425 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4426 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4427 for proc3. i.e we are using 12+15+10=37 storage locations to store 4428 34 values. 4429 4430 When d_nnz, o_nnz parameters are specified, the storage is specified 4431 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4432 In the above case the values for d_nnz,o_nnz are 4433 .vb 4434 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4435 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4436 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4437 .ve 4438 Here the space allocated is sum of all the above values i.e 34, and 4439 hence pre-allocation is perfect. 4440 4441 Level: intermediate 4442 4443 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4444 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4445 @*/ 4446 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4447 { 4448 PetscErrorCode ierr; 4449 PetscMPIInt size; 4450 4451 PetscFunctionBegin; 4452 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4453 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4454 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4455 if (size > 1) { 4456 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4457 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4458 } else { 4459 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4460 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4461 } 4462 PetscFunctionReturn(0); 4463 } 4464 4465 /*@C 4466 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4467 4468 Not collective 4469 4470 Input Parameter: 4471 . A - The MPIAIJ matrix 4472 4473 Output Parameters: 4474 + Ad - The local diagonal block as a SeqAIJ matrix 4475 . Ao - The local off-diagonal block as a SeqAIJ matrix 4476 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4477 4478 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4479 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4480 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4481 local column numbers to global column numbers in the original matrix. 4482 4483 Level: intermediate 4484 4485 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4486 @*/ 4487 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4488 { 4489 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4490 PetscBool flg; 4491 PetscErrorCode ierr; 4492 4493 PetscFunctionBegin; 4494 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4495 PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4496 if (Ad) *Ad = a->A; 4497 if (Ao) *Ao = a->B; 4498 if (colmap) *colmap = a->garray; 4499 PetscFunctionReturn(0); 4500 } 4501 4502 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4503 { 4504 PetscErrorCode ierr; 4505 PetscInt m,N,i,rstart,nnz,Ii; 4506 PetscInt *indx; 4507 PetscScalar *values; 4508 MatType rootType; 4509 4510 PetscFunctionBegin; 4511 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4512 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4513 PetscInt *dnz,*onz,sum,bs,cbs; 4514 4515 if (n == PETSC_DECIDE) { 4516 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4517 } 4518 /* Check sum(n) = N */ 4519 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4520 PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4521 4522 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4523 rstart -= m; 4524 4525 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4526 for (i=0; i<m; i++) { 4527 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4528 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4529 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4530 } 4531 4532 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4533 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4534 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4535 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4536 ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr); 4537 ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr); 4538 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4539 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4540 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4541 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4542 } 4543 4544 /* numeric phase */ 4545 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4546 for (i=0; i<m; i++) { 4547 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4548 Ii = i + rstart; 4549 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4550 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4551 } 4552 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4553 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4554 PetscFunctionReturn(0); 4555 } 4556 4557 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4558 { 4559 PetscErrorCode ierr; 4560 PetscMPIInt rank; 4561 PetscInt m,N,i,rstart,nnz; 4562 size_t len; 4563 const PetscInt *indx; 4564 PetscViewer out; 4565 char *name; 4566 Mat B; 4567 const PetscScalar *values; 4568 4569 PetscFunctionBegin; 4570 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4571 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4572 /* Should this be the type of the diagonal block of A? */ 4573 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4574 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4575 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4576 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4577 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4578 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4579 for (i=0; i<m; i++) { 4580 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4581 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4582 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4583 } 4584 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4585 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4586 4587 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4588 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4589 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4590 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4591 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4592 ierr = PetscFree(name);CHKERRQ(ierr); 4593 ierr = MatView(B,out);CHKERRQ(ierr); 4594 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4595 ierr = MatDestroy(&B);CHKERRQ(ierr); 4596 PetscFunctionReturn(0); 4597 } 4598 4599 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4600 { 4601 PetscErrorCode ierr; 4602 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4603 4604 PetscFunctionBegin; 4605 if (!merge) PetscFunctionReturn(0); 4606 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4607 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4608 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4609 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4610 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4611 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4612 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4613 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4614 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4615 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4616 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4617 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4618 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4619 ierr = PetscFree(merge);CHKERRQ(ierr); 4620 PetscFunctionReturn(0); 4621 } 4622 4623 #include <../src/mat/utils/freespace.h> 4624 #include <petscbt.h> 4625 4626 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4627 { 4628 PetscErrorCode ierr; 4629 MPI_Comm comm; 4630 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4631 PetscMPIInt size,rank,taga,*len_s; 4632 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4633 PetscInt proc,m; 4634 PetscInt **buf_ri,**buf_rj; 4635 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4636 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4637 MPI_Request *s_waits,*r_waits; 4638 MPI_Status *status; 4639 const MatScalar *aa,*a_a; 4640 MatScalar **abuf_r,*ba_i; 4641 Mat_Merge_SeqsToMPI *merge; 4642 PetscContainer container; 4643 4644 PetscFunctionBegin; 4645 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4646 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4647 4648 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4649 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4650 4651 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4652 PetscCheckFalse(!container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4653 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4654 ierr = MatSeqAIJGetArrayRead(seqmat,&a_a);CHKERRQ(ierr); 4655 aa = a_a; 4656 4657 bi = merge->bi; 4658 bj = merge->bj; 4659 buf_ri = merge->buf_ri; 4660 buf_rj = merge->buf_rj; 4661 4662 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4663 owners = merge->rowmap->range; 4664 len_s = merge->len_s; 4665 4666 /* send and recv matrix values */ 4667 /*-----------------------------*/ 4668 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4669 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4670 4671 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4672 for (proc=0,k=0; proc<size; proc++) { 4673 if (!len_s[proc]) continue; 4674 i = owners[proc]; 4675 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4676 k++; 4677 } 4678 4679 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4680 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4681 ierr = PetscFree(status);CHKERRQ(ierr); 4682 4683 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4684 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4685 4686 /* insert mat values of mpimat */ 4687 /*----------------------------*/ 4688 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4689 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4690 4691 for (k=0; k<merge->nrecv; k++) { 4692 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4693 nrows = *(buf_ri_k[k]); 4694 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4695 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4696 } 4697 4698 /* set values of ba */ 4699 m = merge->rowmap->n; 4700 for (i=0; i<m; i++) { 4701 arow = owners[rank] + i; 4702 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4703 bnzi = bi[i+1] - bi[i]; 4704 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4705 4706 /* add local non-zero vals of this proc's seqmat into ba */ 4707 anzi = ai[arow+1] - ai[arow]; 4708 aj = a->j + ai[arow]; 4709 aa = a_a + ai[arow]; 4710 nextaj = 0; 4711 for (j=0; nextaj<anzi; j++) { 4712 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4713 ba_i[j] += aa[nextaj++]; 4714 } 4715 } 4716 4717 /* add received vals into ba */ 4718 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4719 /* i-th row */ 4720 if (i == *nextrow[k]) { 4721 anzi = *(nextai[k]+1) - *nextai[k]; 4722 aj = buf_rj[k] + *(nextai[k]); 4723 aa = abuf_r[k] + *(nextai[k]); 4724 nextaj = 0; 4725 for (j=0; nextaj<anzi; j++) { 4726 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4727 ba_i[j] += aa[nextaj++]; 4728 } 4729 } 4730 nextrow[k]++; nextai[k]++; 4731 } 4732 } 4733 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4734 } 4735 ierr = MatSeqAIJRestoreArrayRead(seqmat,&a_a);CHKERRQ(ierr); 4736 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4737 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4738 4739 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4740 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4741 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4742 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4743 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4744 PetscFunctionReturn(0); 4745 } 4746 4747 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4748 { 4749 PetscErrorCode ierr; 4750 Mat B_mpi; 4751 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4752 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4753 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4754 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4755 PetscInt len,proc,*dnz,*onz,bs,cbs; 4756 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4757 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4758 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4759 MPI_Status *status; 4760 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4761 PetscBT lnkbt; 4762 Mat_Merge_SeqsToMPI *merge; 4763 PetscContainer container; 4764 4765 PetscFunctionBegin; 4766 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4767 4768 /* make sure it is a PETSc comm */ 4769 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4770 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4771 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4772 4773 ierr = PetscNew(&merge);CHKERRQ(ierr); 4774 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4775 4776 /* determine row ownership */ 4777 /*---------------------------------------------------------*/ 4778 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4779 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4780 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4781 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4782 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4783 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4784 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4785 4786 m = merge->rowmap->n; 4787 owners = merge->rowmap->range; 4788 4789 /* determine the number of messages to send, their lengths */ 4790 /*---------------------------------------------------------*/ 4791 len_s = merge->len_s; 4792 4793 len = 0; /* length of buf_si[] */ 4794 merge->nsend = 0; 4795 for (proc=0; proc<size; proc++) { 4796 len_si[proc] = 0; 4797 if (proc == rank) { 4798 len_s[proc] = 0; 4799 } else { 4800 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4801 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4802 } 4803 if (len_s[proc]) { 4804 merge->nsend++; 4805 nrows = 0; 4806 for (i=owners[proc]; i<owners[proc+1]; i++) { 4807 if (ai[i+1] > ai[i]) nrows++; 4808 } 4809 len_si[proc] = 2*(nrows+1); 4810 len += len_si[proc]; 4811 } 4812 } 4813 4814 /* determine the number and length of messages to receive for ij-structure */ 4815 /*-------------------------------------------------------------------------*/ 4816 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4817 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4818 4819 /* post the Irecv of j-structure */ 4820 /*-------------------------------*/ 4821 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4822 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4823 4824 /* post the Isend of j-structure */ 4825 /*--------------------------------*/ 4826 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4827 4828 for (proc=0, k=0; proc<size; proc++) { 4829 if (!len_s[proc]) continue; 4830 i = owners[proc]; 4831 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4832 k++; 4833 } 4834 4835 /* receives and sends of j-structure are complete */ 4836 /*------------------------------------------------*/ 4837 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4838 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4839 4840 /* send and recv i-structure */ 4841 /*---------------------------*/ 4842 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4843 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4844 4845 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4846 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4847 for (proc=0,k=0; proc<size; proc++) { 4848 if (!len_s[proc]) continue; 4849 /* form outgoing message for i-structure: 4850 buf_si[0]: nrows to be sent 4851 [1:nrows]: row index (global) 4852 [nrows+1:2*nrows+1]: i-structure index 4853 */ 4854 /*-------------------------------------------*/ 4855 nrows = len_si[proc]/2 - 1; 4856 buf_si_i = buf_si + nrows+1; 4857 buf_si[0] = nrows; 4858 buf_si_i[0] = 0; 4859 nrows = 0; 4860 for (i=owners[proc]; i<owners[proc+1]; i++) { 4861 anzi = ai[i+1] - ai[i]; 4862 if (anzi) { 4863 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4864 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4865 nrows++; 4866 } 4867 } 4868 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4869 k++; 4870 buf_si += len_si[proc]; 4871 } 4872 4873 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4874 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4875 4876 ierr = PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4877 for (i=0; i<merge->nrecv; i++) { 4878 ierr = PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4879 } 4880 4881 ierr = PetscFree(len_si);CHKERRQ(ierr); 4882 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4883 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4884 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4885 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4886 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4887 ierr = PetscFree(status);CHKERRQ(ierr); 4888 4889 /* compute a local seq matrix in each processor */ 4890 /*----------------------------------------------*/ 4891 /* allocate bi array and free space for accumulating nonzero column info */ 4892 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4893 bi[0] = 0; 4894 4895 /* create and initialize a linked list */ 4896 nlnk = N+1; 4897 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4898 4899 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4900 len = ai[owners[rank+1]] - ai[owners[rank]]; 4901 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4902 4903 current_space = free_space; 4904 4905 /* determine symbolic info for each local row */ 4906 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4907 4908 for (k=0; k<merge->nrecv; k++) { 4909 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4910 nrows = *buf_ri_k[k]; 4911 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4912 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4913 } 4914 4915 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4916 len = 0; 4917 for (i=0; i<m; i++) { 4918 bnzi = 0; 4919 /* add local non-zero cols of this proc's seqmat into lnk */ 4920 arow = owners[rank] + i; 4921 anzi = ai[arow+1] - ai[arow]; 4922 aj = a->j + ai[arow]; 4923 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4924 bnzi += nlnk; 4925 /* add received col data into lnk */ 4926 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4927 if (i == *nextrow[k]) { /* i-th row */ 4928 anzi = *(nextai[k]+1) - *nextai[k]; 4929 aj = buf_rj[k] + *nextai[k]; 4930 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4931 bnzi += nlnk; 4932 nextrow[k]++; nextai[k]++; 4933 } 4934 } 4935 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4936 4937 /* if free space is not available, make more free space */ 4938 if (current_space->local_remaining<bnzi) { 4939 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4940 nspacedouble++; 4941 } 4942 /* copy data into free space, then initialize lnk */ 4943 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4944 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4945 4946 current_space->array += bnzi; 4947 current_space->local_used += bnzi; 4948 current_space->local_remaining -= bnzi; 4949 4950 bi[i+1] = bi[i] + bnzi; 4951 } 4952 4953 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4954 4955 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4956 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4957 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4958 4959 /* create symbolic parallel matrix B_mpi */ 4960 /*---------------------------------------*/ 4961 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4962 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4963 if (n==PETSC_DECIDE) { 4964 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4965 } else { 4966 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4967 } 4968 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4969 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4970 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4971 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4972 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4973 4974 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4975 B_mpi->assembled = PETSC_FALSE; 4976 merge->bi = bi; 4977 merge->bj = bj; 4978 merge->buf_ri = buf_ri; 4979 merge->buf_rj = buf_rj; 4980 merge->coi = NULL; 4981 merge->coj = NULL; 4982 merge->owners_co = NULL; 4983 4984 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4985 4986 /* attach the supporting struct to B_mpi for reuse */ 4987 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4988 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4989 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4990 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4991 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4992 *mpimat = B_mpi; 4993 4994 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4995 PetscFunctionReturn(0); 4996 } 4997 4998 /*@C 4999 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5000 matrices from each processor 5001 5002 Collective 5003 5004 Input Parameters: 5005 + comm - the communicators the parallel matrix will live on 5006 . seqmat - the input sequential matrices 5007 . m - number of local rows (or PETSC_DECIDE) 5008 . n - number of local columns (or PETSC_DECIDE) 5009 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5010 5011 Output Parameter: 5012 . mpimat - the parallel matrix generated 5013 5014 Level: advanced 5015 5016 Notes: 5017 The dimensions of the sequential matrix in each processor MUST be the same. 5018 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5019 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5020 @*/ 5021 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5022 { 5023 PetscErrorCode ierr; 5024 PetscMPIInt size; 5025 5026 PetscFunctionBegin; 5027 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5028 if (size == 1) { 5029 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5030 if (scall == MAT_INITIAL_MATRIX) { 5031 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5032 } else { 5033 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5034 } 5035 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5036 PetscFunctionReturn(0); 5037 } 5038 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5039 if (scall == MAT_INITIAL_MATRIX) { 5040 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5041 } 5042 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5043 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5044 PetscFunctionReturn(0); 5045 } 5046 5047 /*@ 5048 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5049 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5050 with MatGetSize() 5051 5052 Not Collective 5053 5054 Input Parameters: 5055 + A - the matrix 5056 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5057 5058 Output Parameter: 5059 . A_loc - the local sequential matrix generated 5060 5061 Level: developer 5062 5063 Notes: 5064 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5065 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5066 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5067 modify the values of the returned A_loc. 5068 5069 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5070 @*/ 5071 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5072 { 5073 PetscErrorCode ierr; 5074 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5075 Mat_SeqAIJ *mat,*a,*b; 5076 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5077 const PetscScalar *aa,*ba,*aav,*bav; 5078 PetscScalar *ca,*cam; 5079 PetscMPIInt size; 5080 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5081 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5082 PetscBool match; 5083 5084 PetscFunctionBegin; 5085 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5086 PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5087 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5088 if (size == 1) { 5089 if (scall == MAT_INITIAL_MATRIX) { 5090 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5091 *A_loc = mpimat->A; 5092 } else if (scall == MAT_REUSE_MATRIX) { 5093 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5094 } 5095 PetscFunctionReturn(0); 5096 } 5097 5098 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5099 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5100 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5101 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5102 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5103 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5104 aa = aav; 5105 ba = bav; 5106 if (scall == MAT_INITIAL_MATRIX) { 5107 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5108 ci[0] = 0; 5109 for (i=0; i<am; i++) { 5110 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5111 } 5112 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5113 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5114 k = 0; 5115 for (i=0; i<am; i++) { 5116 ncols_o = bi[i+1] - bi[i]; 5117 ncols_d = ai[i+1] - ai[i]; 5118 /* off-diagonal portion of A */ 5119 for (jo=0; jo<ncols_o; jo++) { 5120 col = cmap[*bj]; 5121 if (col >= cstart) break; 5122 cj[k] = col; bj++; 5123 ca[k++] = *ba++; 5124 } 5125 /* diagonal portion of A */ 5126 for (j=0; j<ncols_d; j++) { 5127 cj[k] = cstart + *aj++; 5128 ca[k++] = *aa++; 5129 } 5130 /* off-diagonal portion of A */ 5131 for (j=jo; j<ncols_o; j++) { 5132 cj[k] = cmap[*bj++]; 5133 ca[k++] = *ba++; 5134 } 5135 } 5136 /* put together the new matrix */ 5137 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5138 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5139 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5140 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5141 mat->free_a = PETSC_TRUE; 5142 mat->free_ij = PETSC_TRUE; 5143 mat->nonew = 0; 5144 } else if (scall == MAT_REUSE_MATRIX) { 5145 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5146 ci = mat->i; 5147 cj = mat->j; 5148 ierr = MatSeqAIJGetArrayWrite(*A_loc,&cam);CHKERRQ(ierr); 5149 for (i=0; i<am; i++) { 5150 /* off-diagonal portion of A */ 5151 ncols_o = bi[i+1] - bi[i]; 5152 for (jo=0; jo<ncols_o; jo++) { 5153 col = cmap[*bj]; 5154 if (col >= cstart) break; 5155 *cam++ = *ba++; bj++; 5156 } 5157 /* diagonal portion of A */ 5158 ncols_d = ai[i+1] - ai[i]; 5159 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5160 /* off-diagonal portion of A */ 5161 for (j=jo; j<ncols_o; j++) { 5162 *cam++ = *ba++; bj++; 5163 } 5164 } 5165 ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&cam);CHKERRQ(ierr); 5166 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5167 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5168 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5169 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5170 PetscFunctionReturn(0); 5171 } 5172 5173 /*@ 5174 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5175 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5176 5177 Not Collective 5178 5179 Input Parameters: 5180 + A - the matrix 5181 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5182 5183 Output Parameters: 5184 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5185 - A_loc - the local sequential matrix generated 5186 5187 Level: developer 5188 5189 Notes: 5190 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5191 5192 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5193 5194 @*/ 5195 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5196 { 5197 PetscErrorCode ierr; 5198 Mat Ao,Ad; 5199 const PetscInt *cmap; 5200 PetscMPIInt size; 5201 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5202 5203 PetscFunctionBegin; 5204 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5205 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5206 if (size == 1) { 5207 if (scall == MAT_INITIAL_MATRIX) { 5208 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5209 *A_loc = Ad; 5210 } else if (scall == MAT_REUSE_MATRIX) { 5211 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5212 } 5213 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5214 PetscFunctionReturn(0); 5215 } 5216 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5217 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5218 if (f) { 5219 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5220 } else { 5221 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5222 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5223 Mat_SeqAIJ *c; 5224 PetscInt *ai = a->i, *aj = a->j; 5225 PetscInt *bi = b->i, *bj = b->j; 5226 PetscInt *ci,*cj; 5227 const PetscScalar *aa,*ba; 5228 PetscScalar *ca; 5229 PetscInt i,j,am,dn,on; 5230 5231 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5232 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5233 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5234 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5235 if (scall == MAT_INITIAL_MATRIX) { 5236 PetscInt k; 5237 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5238 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5239 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5240 ci[0] = 0; 5241 for (i=0,k=0; i<am; i++) { 5242 const PetscInt ncols_o = bi[i+1] - bi[i]; 5243 const PetscInt ncols_d = ai[i+1] - ai[i]; 5244 ci[i+1] = ci[i] + ncols_o + ncols_d; 5245 /* diagonal portion of A */ 5246 for (j=0; j<ncols_d; j++,k++) { 5247 cj[k] = *aj++; 5248 ca[k] = *aa++; 5249 } 5250 /* off-diagonal portion of A */ 5251 for (j=0; j<ncols_o; j++,k++) { 5252 cj[k] = dn + *bj++; 5253 ca[k] = *ba++; 5254 } 5255 } 5256 /* put together the new matrix */ 5257 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5258 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5259 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5260 c = (Mat_SeqAIJ*)(*A_loc)->data; 5261 c->free_a = PETSC_TRUE; 5262 c->free_ij = PETSC_TRUE; 5263 c->nonew = 0; 5264 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5265 } else if (scall == MAT_REUSE_MATRIX) { 5266 ierr = MatSeqAIJGetArrayWrite(*A_loc,&ca);CHKERRQ(ierr); 5267 for (i=0; i<am; i++) { 5268 const PetscInt ncols_d = ai[i+1] - ai[i]; 5269 const PetscInt ncols_o = bi[i+1] - bi[i]; 5270 /* diagonal portion of A */ 5271 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5272 /* off-diagonal portion of A */ 5273 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5274 } 5275 ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&ca);CHKERRQ(ierr); 5276 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5277 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5278 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5279 if (glob) { 5280 PetscInt cst, *gidx; 5281 5282 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5283 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5284 for (i=0; i<dn; i++) gidx[i] = cst + i; 5285 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5286 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5287 } 5288 } 5289 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5290 PetscFunctionReturn(0); 5291 } 5292 5293 /*@C 5294 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5295 5296 Not Collective 5297 5298 Input Parameters: 5299 + A - the matrix 5300 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5301 - row, col - index sets of rows and columns to extract (or NULL) 5302 5303 Output Parameter: 5304 . A_loc - the local sequential matrix generated 5305 5306 Level: developer 5307 5308 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5309 5310 @*/ 5311 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5312 { 5313 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5314 PetscErrorCode ierr; 5315 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5316 IS isrowa,iscola; 5317 Mat *aloc; 5318 PetscBool match; 5319 5320 PetscFunctionBegin; 5321 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5322 PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5323 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5324 if (!row) { 5325 start = A->rmap->rstart; end = A->rmap->rend; 5326 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5327 } else { 5328 isrowa = *row; 5329 } 5330 if (!col) { 5331 start = A->cmap->rstart; 5332 cmap = a->garray; 5333 nzA = a->A->cmap->n; 5334 nzB = a->B->cmap->n; 5335 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5336 ncols = 0; 5337 for (i=0; i<nzB; i++) { 5338 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5339 else break; 5340 } 5341 imark = i; 5342 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5343 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5344 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5345 } else { 5346 iscola = *col; 5347 } 5348 if (scall != MAT_INITIAL_MATRIX) { 5349 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5350 aloc[0] = *A_loc; 5351 } 5352 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5353 if (!col) { /* attach global id of condensed columns */ 5354 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5355 } 5356 *A_loc = aloc[0]; 5357 ierr = PetscFree(aloc);CHKERRQ(ierr); 5358 if (!row) { 5359 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5360 } 5361 if (!col) { 5362 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5363 } 5364 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5365 PetscFunctionReturn(0); 5366 } 5367 5368 /* 5369 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5370 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5371 * on a global size. 5372 * */ 5373 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5374 { 5375 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5376 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5377 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5378 PetscMPIInt owner; 5379 PetscSFNode *iremote,*oiremote; 5380 const PetscInt *lrowindices; 5381 PetscErrorCode ierr; 5382 PetscSF sf,osf; 5383 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5384 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5385 MPI_Comm comm; 5386 ISLocalToGlobalMapping mapping; 5387 const PetscScalar *pd_a,*po_a; 5388 5389 PetscFunctionBegin; 5390 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5391 /* plocalsize is the number of roots 5392 * nrows is the number of leaves 5393 * */ 5394 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5395 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5396 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5397 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5398 for (i=0;i<nrows;i++) { 5399 /* Find a remote index and an owner for a row 5400 * The row could be local or remote 5401 * */ 5402 owner = 0; 5403 lidx = 0; 5404 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5405 iremote[i].index = lidx; 5406 iremote[i].rank = owner; 5407 } 5408 /* Create SF to communicate how many nonzero columns for each row */ 5409 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5410 /* SF will figure out the number of nonzero colunms for each row, and their 5411 * offsets 5412 * */ 5413 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5414 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5415 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5416 5417 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5418 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5419 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5420 roffsets[0] = 0; 5421 roffsets[1] = 0; 5422 for (i=0;i<plocalsize;i++) { 5423 /* diag */ 5424 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5425 /* off diag */ 5426 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5427 /* compute offsets so that we relative location for each row */ 5428 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5429 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5430 } 5431 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5432 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5433 /* 'r' means root, and 'l' means leaf */ 5434 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5435 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5436 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5437 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5438 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5439 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5440 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5441 dntotalcols = 0; 5442 ontotalcols = 0; 5443 ncol = 0; 5444 for (i=0;i<nrows;i++) { 5445 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5446 ncol = PetscMax(pnnz[i],ncol); 5447 /* diag */ 5448 dntotalcols += nlcols[i*2+0]; 5449 /* off diag */ 5450 ontotalcols += nlcols[i*2+1]; 5451 } 5452 /* We do not need to figure the right number of columns 5453 * since all the calculations will be done by going through the raw data 5454 * */ 5455 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5456 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5457 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5458 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5459 /* diag */ 5460 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5461 /* off diag */ 5462 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5463 /* diag */ 5464 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5465 /* off diag */ 5466 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5467 dntotalcols = 0; 5468 ontotalcols = 0; 5469 ntotalcols = 0; 5470 for (i=0;i<nrows;i++) { 5471 owner = 0; 5472 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5473 /* Set iremote for diag matrix */ 5474 for (j=0;j<nlcols[i*2+0];j++) { 5475 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5476 iremote[dntotalcols].rank = owner; 5477 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5478 ilocal[dntotalcols++] = ntotalcols++; 5479 } 5480 /* off diag */ 5481 for (j=0;j<nlcols[i*2+1];j++) { 5482 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5483 oiremote[ontotalcols].rank = owner; 5484 oilocal[ontotalcols++] = ntotalcols++; 5485 } 5486 } 5487 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5488 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5489 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5490 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5491 /* P serves as roots and P_oth is leaves 5492 * Diag matrix 5493 * */ 5494 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5495 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5496 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5497 5498 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5499 /* Off diag */ 5500 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5501 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5502 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5503 ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5504 ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr); 5505 /* We operate on the matrix internal data for saving memory */ 5506 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5507 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5508 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5509 /* Convert to global indices for diag matrix */ 5510 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5511 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5512 /* We want P_oth store global indices */ 5513 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5514 /* Use memory scalable approach */ 5515 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5516 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5517 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5518 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5519 /* Convert back to local indices */ 5520 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5521 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5522 nout = 0; 5523 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5524 PetscCheckFalse(nout != po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5525 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5526 /* Exchange values */ 5527 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5528 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5529 ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5530 ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr); 5531 /* Stop PETSc from shrinking memory */ 5532 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5533 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5534 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5535 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5536 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5537 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5538 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5539 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5540 PetscFunctionReturn(0); 5541 } 5542 5543 /* 5544 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5545 * This supports MPIAIJ and MAIJ 5546 * */ 5547 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5548 { 5549 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5550 Mat_SeqAIJ *p_oth; 5551 IS rows,map; 5552 PetscHMapI hamp; 5553 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5554 MPI_Comm comm; 5555 PetscSF sf,osf; 5556 PetscBool has; 5557 PetscErrorCode ierr; 5558 5559 PetscFunctionBegin; 5560 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5561 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5562 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5563 * and then create a submatrix (that often is an overlapping matrix) 5564 * */ 5565 if (reuse == MAT_INITIAL_MATRIX) { 5566 /* Use a hash table to figure out unique keys */ 5567 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5568 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5569 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5570 count = 0; 5571 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5572 for (i=0;i<a->B->cmap->n;i++) { 5573 key = a->garray[i]/dof; 5574 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5575 if (!has) { 5576 mapping[i] = count; 5577 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5578 } else { 5579 /* Current 'i' has the same value the previous step */ 5580 mapping[i] = count-1; 5581 } 5582 } 5583 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5584 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5585 PetscCheckFalse(htsize!=count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5586 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5587 off = 0; 5588 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5589 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5590 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5591 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5592 /* In case, the matrix was already created but users want to recreate the matrix */ 5593 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5594 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5595 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5596 ierr = ISDestroy(&map);CHKERRQ(ierr); 5597 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5598 } else if (reuse == MAT_REUSE_MATRIX) { 5599 /* If matrix was already created, we simply update values using SF objects 5600 * that as attached to the matrix ealier. 5601 */ 5602 const PetscScalar *pd_a,*po_a; 5603 5604 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5605 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5606 PetscCheckFalse(!sf || !osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5607 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5608 /* Update values in place */ 5609 ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5610 ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr); 5611 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5612 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5613 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5614 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5615 ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5616 ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr); 5617 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5618 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5619 PetscFunctionReturn(0); 5620 } 5621 5622 /*@C 5623 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5624 5625 Collective on Mat 5626 5627 Input Parameters: 5628 + A - the first matrix in mpiaij format 5629 . B - the second matrix in mpiaij format 5630 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5631 5632 Output Parameters: 5633 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5634 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5635 - B_seq - the sequential matrix generated 5636 5637 Level: developer 5638 5639 @*/ 5640 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5641 { 5642 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5643 PetscErrorCode ierr; 5644 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5645 IS isrowb,iscolb; 5646 Mat *bseq=NULL; 5647 5648 PetscFunctionBegin; 5649 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5650 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5651 } 5652 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5653 5654 if (scall == MAT_INITIAL_MATRIX) { 5655 start = A->cmap->rstart; 5656 cmap = a->garray; 5657 nzA = a->A->cmap->n; 5658 nzB = a->B->cmap->n; 5659 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5660 ncols = 0; 5661 for (i=0; i<nzB; i++) { /* row < local row index */ 5662 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5663 else break; 5664 } 5665 imark = i; 5666 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5667 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5668 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5669 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5670 } else { 5671 PetscCheckFalse(!rowb || !colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5672 isrowb = *rowb; iscolb = *colb; 5673 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5674 bseq[0] = *B_seq; 5675 } 5676 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5677 *B_seq = bseq[0]; 5678 ierr = PetscFree(bseq);CHKERRQ(ierr); 5679 if (!rowb) { 5680 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5681 } else { 5682 *rowb = isrowb; 5683 } 5684 if (!colb) { 5685 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5686 } else { 5687 *colb = iscolb; 5688 } 5689 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5690 PetscFunctionReturn(0); 5691 } 5692 5693 /* 5694 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5695 of the OFF-DIAGONAL portion of local A 5696 5697 Collective on Mat 5698 5699 Input Parameters: 5700 + A,B - the matrices in mpiaij format 5701 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5702 5703 Output Parameter: 5704 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5705 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5706 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5707 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5708 5709 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5710 for this matrix. This is not desirable.. 5711 5712 Level: developer 5713 5714 */ 5715 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5716 { 5717 PetscErrorCode ierr; 5718 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5719 Mat_SeqAIJ *b_oth; 5720 VecScatter ctx; 5721 MPI_Comm comm; 5722 const PetscMPIInt *rprocs,*sprocs; 5723 const PetscInt *srow,*rstarts,*sstarts; 5724 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5725 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5726 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5727 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5728 PetscMPIInt size,tag,rank,nreqs; 5729 5730 PetscFunctionBegin; 5731 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5732 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5733 5734 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5735 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5736 } 5737 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5738 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5739 5740 if (size == 1) { 5741 startsj_s = NULL; 5742 bufa_ptr = NULL; 5743 *B_oth = NULL; 5744 PetscFunctionReturn(0); 5745 } 5746 5747 ctx = a->Mvctx; 5748 tag = ((PetscObject)ctx)->tag; 5749 5750 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5751 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5752 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5753 ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr); 5754 ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr); 5755 rwaits = reqs; 5756 swaits = reqs + nrecvs; 5757 5758 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5759 if (scall == MAT_INITIAL_MATRIX) { 5760 /* i-array */ 5761 /*---------*/ 5762 /* post receives */ 5763 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5764 for (i=0; i<nrecvs; i++) { 5765 rowlen = rvalues + rstarts[i]*rbs; 5766 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5767 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5768 } 5769 5770 /* pack the outgoing message */ 5771 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5772 5773 sstartsj[0] = 0; 5774 rstartsj[0] = 0; 5775 len = 0; /* total length of j or a array to be sent */ 5776 if (nsends) { 5777 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5778 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5779 } 5780 for (i=0; i<nsends; i++) { 5781 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5782 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5783 for (j=0; j<nrows; j++) { 5784 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5785 for (l=0; l<sbs; l++) { 5786 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5787 5788 rowlen[j*sbs+l] = ncols; 5789 5790 len += ncols; 5791 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5792 } 5793 k++; 5794 } 5795 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5796 5797 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5798 } 5799 /* recvs and sends of i-array are completed */ 5800 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5801 ierr = PetscFree(svalues);CHKERRQ(ierr); 5802 5803 /* allocate buffers for sending j and a arrays */ 5804 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5805 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5806 5807 /* create i-array of B_oth */ 5808 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5809 5810 b_othi[0] = 0; 5811 len = 0; /* total length of j or a array to be received */ 5812 k = 0; 5813 for (i=0; i<nrecvs; i++) { 5814 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5815 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5816 for (j=0; j<nrows; j++) { 5817 b_othi[k+1] = b_othi[k] + rowlen[j]; 5818 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5819 k++; 5820 } 5821 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5822 } 5823 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5824 5825 /* allocate space for j and a arrrays of B_oth */ 5826 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5827 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5828 5829 /* j-array */ 5830 /*---------*/ 5831 /* post receives of j-array */ 5832 for (i=0; i<nrecvs; i++) { 5833 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5834 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5835 } 5836 5837 /* pack the outgoing message j-array */ 5838 if (nsends) k = sstarts[0]; 5839 for (i=0; i<nsends; i++) { 5840 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5841 bufJ = bufj+sstartsj[i]; 5842 for (j=0; j<nrows; j++) { 5843 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5844 for (ll=0; ll<sbs; ll++) { 5845 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5846 for (l=0; l<ncols; l++) { 5847 *bufJ++ = cols[l]; 5848 } 5849 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5850 } 5851 } 5852 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5853 } 5854 5855 /* recvs and sends of j-array are completed */ 5856 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5857 } else if (scall == MAT_REUSE_MATRIX) { 5858 sstartsj = *startsj_s; 5859 rstartsj = *startsj_r; 5860 bufa = *bufa_ptr; 5861 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5862 ierr = MatSeqAIJGetArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr); 5863 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5864 5865 /* a-array */ 5866 /*---------*/ 5867 /* post receives of a-array */ 5868 for (i=0; i<nrecvs; i++) { 5869 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5870 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5871 } 5872 5873 /* pack the outgoing message a-array */ 5874 if (nsends) k = sstarts[0]; 5875 for (i=0; i<nsends; i++) { 5876 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5877 bufA = bufa+sstartsj[i]; 5878 for (j=0; j<nrows; j++) { 5879 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5880 for (ll=0; ll<sbs; ll++) { 5881 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5882 for (l=0; l<ncols; l++) { 5883 *bufA++ = vals[l]; 5884 } 5885 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5886 } 5887 } 5888 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5889 } 5890 /* recvs and sends of a-array are completed */ 5891 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5892 ierr = PetscFree(reqs);CHKERRQ(ierr); 5893 5894 if (scall == MAT_INITIAL_MATRIX) { 5895 /* put together the new matrix */ 5896 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5897 5898 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5899 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5900 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5901 b_oth->free_a = PETSC_TRUE; 5902 b_oth->free_ij = PETSC_TRUE; 5903 b_oth->nonew = 0; 5904 5905 ierr = PetscFree(bufj);CHKERRQ(ierr); 5906 if (!startsj_s || !bufa_ptr) { 5907 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5908 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5909 } else { 5910 *startsj_s = sstartsj; 5911 *startsj_r = rstartsj; 5912 *bufa_ptr = bufa; 5913 } 5914 } else if (scall == MAT_REUSE_MATRIX) { 5915 ierr = MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr); 5916 } 5917 5918 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5919 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5920 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5921 PetscFunctionReturn(0); 5922 } 5923 5924 /*@C 5925 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5926 5927 Not Collective 5928 5929 Input Parameter: 5930 . A - The matrix in mpiaij format 5931 5932 Output Parameters: 5933 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5934 . colmap - A map from global column index to local index into lvec 5935 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5936 5937 Level: developer 5938 5939 @*/ 5940 #if defined(PETSC_USE_CTABLE) 5941 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5942 #else 5943 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5944 #endif 5945 { 5946 Mat_MPIAIJ *a; 5947 5948 PetscFunctionBegin; 5949 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5950 PetscValidPointer(lvec, 2); 5951 PetscValidPointer(colmap, 3); 5952 PetscValidPointer(multScatter, 4); 5953 a = (Mat_MPIAIJ*) A->data; 5954 if (lvec) *lvec = a->lvec; 5955 if (colmap) *colmap = a->colmap; 5956 if (multScatter) *multScatter = a->Mvctx; 5957 PetscFunctionReturn(0); 5958 } 5959 5960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5961 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5963 #if defined(PETSC_HAVE_MKL_SPARSE) 5964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5965 #endif 5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5967 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5968 #if defined(PETSC_HAVE_ELEMENTAL) 5969 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5970 #endif 5971 #if defined(PETSC_HAVE_SCALAPACK) 5972 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5973 #endif 5974 #if defined(PETSC_HAVE_HYPRE) 5975 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5976 #endif 5977 #if defined(PETSC_HAVE_CUDA) 5978 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5979 #endif 5980 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5981 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5982 #endif 5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5984 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5985 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5986 5987 /* 5988 Computes (B'*A')' since computing B*A directly is untenable 5989 5990 n p p 5991 [ ] [ ] [ ] 5992 m [ A ] * n [ B ] = m [ C ] 5993 [ ] [ ] [ ] 5994 5995 */ 5996 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5997 { 5998 PetscErrorCode ierr; 5999 Mat At,Bt,Ct; 6000 6001 PetscFunctionBegin; 6002 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 6003 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 6004 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 6005 ierr = MatDestroy(&At);CHKERRQ(ierr); 6006 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 6007 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 6008 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 6009 PetscFunctionReturn(0); 6010 } 6011 6012 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6013 { 6014 PetscErrorCode ierr; 6015 PetscBool cisdense; 6016 6017 PetscFunctionBegin; 6018 PetscCheckFalse(A->cmap->n != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 6019 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 6020 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 6021 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 6022 if (!cisdense) { 6023 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6024 } 6025 ierr = MatSetUp(C);CHKERRQ(ierr); 6026 6027 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6028 PetscFunctionReturn(0); 6029 } 6030 6031 /* ----------------------------------------------------------------*/ 6032 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6033 { 6034 Mat_Product *product = C->product; 6035 Mat A = product->A,B=product->B; 6036 6037 PetscFunctionBegin; 6038 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6039 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6040 6041 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6042 C->ops->productsymbolic = MatProductSymbolic_AB; 6043 PetscFunctionReturn(0); 6044 } 6045 6046 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6047 { 6048 PetscErrorCode ierr; 6049 Mat_Product *product = C->product; 6050 6051 PetscFunctionBegin; 6052 if (product->type == MATPRODUCT_AB) { 6053 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6054 } 6055 PetscFunctionReturn(0); 6056 } 6057 6058 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value 6059 is greater than value, or last if there is no such element. 6060 */ 6061 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper) 6062 { 6063 PetscCount it,step,count = last - first; 6064 6065 PetscFunctionBegin; 6066 while (count > 0) { 6067 it = first; 6068 step = count / 2; 6069 it += step; 6070 if (!(value < array[it])) { 6071 first = ++it; 6072 count -= step + 1; 6073 } else count = step; 6074 } 6075 *upper = first; 6076 PetscFunctionReturn(0); 6077 } 6078 6079 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix 6080 6081 Input Parameters: 6082 6083 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6084 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6085 6086 mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat 6087 6088 For Set1, j1[] contains column indices of the nonzeros. 6089 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6090 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6091 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6092 6093 Similar for Set2. 6094 6095 This routine merges the two sets of nonzeros row by row and removes repeats. 6096 6097 Output Parameters: (memories are allocated by the caller) 6098 6099 i[],j[]: the CSR of the merged matrix, which has m rows. 6100 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6101 imap2[]: similar to imap1[], but for Set2. 6102 Note we order nonzeros row-by-row and from left to right. 6103 */ 6104 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6105 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6106 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6107 { 6108 PetscErrorCode ierr; 6109 PetscInt r,m; /* Row index of mat */ 6110 PetscCount t,t1,t2,b1,e1,b2,e2; 6111 6112 PetscFunctionBegin; 6113 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 6114 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6115 i[0] = 0; 6116 for (r=0; r<m; r++) { /* Do row by row merging */ 6117 b1 = rowBegin1[r]; 6118 e1 = rowEnd1[r]; 6119 b2 = rowBegin2[r]; 6120 e2 = rowEnd2[r]; 6121 while (b1 < e1 && b2 < e2) { 6122 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6123 j[t] = j1[b1]; 6124 imap1[t1] = t; 6125 imap2[t2] = t; 6126 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6127 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6128 t1++; t2++; t++; 6129 } else if (j1[b1] < j2[b2]) { 6130 j[t] = j1[b1]; 6131 imap1[t1] = t; 6132 b1 += jmap1[t1+1] - jmap1[t1]; 6133 t1++; t++; 6134 } else { 6135 j[t] = j2[b2]; 6136 imap2[t2] = t; 6137 b2 += jmap2[t2+1] - jmap2[t2]; 6138 t2++; t++; 6139 } 6140 } 6141 /* Merge the remaining in either j1[] or j2[] */ 6142 while (b1 < e1) { 6143 j[t] = j1[b1]; 6144 imap1[t1] = t; 6145 b1 += jmap1[t1+1] - jmap1[t1]; 6146 t1++; t++; 6147 } 6148 while (b2 < e2) { 6149 j[t] = j2[b2]; 6150 imap2[t2] = t; 6151 b2 += jmap2[t2+1] - jmap2[t2]; 6152 t2++; t++; 6153 } 6154 i[r+1] = t; 6155 } 6156 PetscFunctionReturn(0); 6157 } 6158 6159 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block 6160 6161 Input Parameters: 6162 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6163 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6164 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6165 6166 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6167 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6168 6169 Output Parameters: 6170 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6171 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6172 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6173 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6174 6175 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6176 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6177 repeats (i.e., same 'i,j' pair). 6178 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6179 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6180 6181 Atot: number of entries belonging to the diagonal block 6182 Annz: number of unique nonzeros belonging to the diagonal block. 6183 6184 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6185 6186 Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order. 6187 */ 6188 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6189 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6190 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6191 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6192 { 6193 PetscErrorCode ierr; 6194 PetscInt cstart,cend,rstart,rend,row,col; 6195 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6196 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6197 PetscCount k,m,p,q,r,s,mid; 6198 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6199 6200 PetscFunctionBegin; 6201 ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr); 6202 ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr); 6203 m = rend - rstart; 6204 6205 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6206 6207 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6208 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6209 */ 6210 while (k<n) { 6211 row = i[k]; 6212 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6213 for (s=k; s<n; s++) if (i[s] != row) break; 6214 for (p=k; p<s; p++) { 6215 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6216 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6217 } 6218 ierr = PetscSortIntWithCountArray(s-k,j+k,perm+k);CHKERRQ(ierr); 6219 ierr = PetscSortedIntUpperBound(j,k,s,-1,&mid);CHKERRQ(ierr); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6220 rowBegin[row-rstart] = k; 6221 rowMid[row-rstart] = mid; 6222 rowEnd[row-rstart] = s; 6223 6224 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6225 Atot += mid - k; 6226 Btot += s - mid; 6227 6228 /* Count unique nonzeros of this diag/offdiag row */ 6229 for (p=k; p<mid;) { 6230 col = j[p]; 6231 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6232 Annz++; 6233 } 6234 6235 for (p=mid; p<s;) { 6236 col = j[p]; 6237 do {p++;} while (p<s && j[p] == col); 6238 Bnnz++; 6239 } 6240 k = s; 6241 } 6242 6243 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6244 ierr = PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap);CHKERRQ(ierr); 6245 6246 /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6247 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6248 for (r=0; r<m; r++) { 6249 k = rowBegin[r]; 6250 mid = rowMid[r]; 6251 s = rowEnd[r]; 6252 ierr = PetscArraycpy(Aperm+Atot,perm+k, mid-k);CHKERRQ(ierr); 6253 ierr = PetscArraycpy(Bperm+Btot,perm+mid,s-mid);CHKERRQ(ierr); 6254 Atot += mid - k; 6255 Btot += s - mid; 6256 6257 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6258 for (p=k; p<mid;) { 6259 col = j[p]; 6260 q = p; 6261 do {p++;} while (p<mid && j[p] == col); 6262 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6263 Annz++; 6264 } 6265 6266 for (p=mid; p<s;) { 6267 col = j[p]; 6268 q = p; 6269 do {p++;} while (p<s && j[p] == col); 6270 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6271 Bnnz++; 6272 } 6273 } 6274 /* Output */ 6275 *Aperm_ = Aperm; 6276 *Annz_ = Annz; 6277 *Atot_ = Atot; 6278 *Ajmap_ = Ajmap; 6279 *Bperm_ = Bperm; 6280 *Bnnz_ = Bnnz; 6281 *Btot_ = Btot; 6282 *Bjmap_ = Bjmap; 6283 PetscFunctionReturn(0); 6284 } 6285 6286 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6287 { 6288 PetscErrorCode ierr; 6289 MPI_Comm comm; 6290 PetscMPIInt rank,size; 6291 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6292 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6293 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6294 6295 PetscFunctionBegin; 6296 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6297 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 6298 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 6299 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 6300 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 6301 ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr); 6302 ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr); 6303 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 6304 ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr); 6305 6306 /* ---------------------------------------------------------------------------*/ 6307 /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */ 6308 /* entries come first, then local rows, then remote rows. */ 6309 /* ---------------------------------------------------------------------------*/ 6310 PetscCount n1 = coo_n,*perm1; 6311 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6312 ierr = PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1);CHKERRQ(ierr); 6313 ierr = PetscArraycpy(i1,coo_i,n1);CHKERRQ(ierr); /* Make a copy since we'll modify it */ 6314 ierr = PetscArraycpy(j1,coo_j,n1);CHKERRQ(ierr); 6315 for (k=0; k<n1; k++) perm1[k] = k; 6316 6317 /* Manipulate indices so that entries with negative row or col indices will have smallest 6318 row indices, local entries will have greater but negative row indices, and remote entries 6319 will have positive row indices. 6320 */ 6321 for (k=0; k<n1; k++) { 6322 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6323 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6324 else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6325 else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6326 } 6327 6328 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6329 ierr = PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1);CHKERRQ(ierr); 6330 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6331 ierr = PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem);CHKERRQ(ierr); /* rem is upper bound of the last local row */ 6332 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6333 6334 /* ---------------------------------------------------------------------------*/ 6335 /* Split local rows into diag/offdiag portions */ 6336 /* ---------------------------------------------------------------------------*/ 6337 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6338 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6339 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6340 6341 ierr = PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1);CHKERRQ(ierr); 6342 ierr = PetscMalloc1(n1-rem,&Cperm1);CHKERRQ(ierr); 6343 ierr = MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1);CHKERRQ(ierr); 6344 6345 /* ---------------------------------------------------------------------------*/ 6346 /* Send remote rows to their owner */ 6347 /* ---------------------------------------------------------------------------*/ 6348 /* Find which rows should be sent to which remote ranks*/ 6349 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6350 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6351 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6352 const PetscInt *ranges; 6353 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6354 6355 ierr = PetscLayoutGetRanges(mat->rmap,&ranges);CHKERRQ(ierr); 6356 ierr = PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries);CHKERRQ(ierr); 6357 for (k=rem; k<n1;) { 6358 PetscMPIInt owner; 6359 PetscInt firstRow,lastRow; 6360 /* Locate a row range */ 6361 firstRow = i1[k]; /* first row of this owner */ 6362 ierr = PetscLayoutFindOwner(mat->rmap,firstRow,&owner);CHKERRQ(ierr); 6363 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6364 6365 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6366 ierr = PetscSortedIntUpperBound(i1,k,n1,lastRow,&p);CHKERRQ(ierr); 6367 6368 /* All entries in [k,p) belong to this remote owner */ 6369 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6370 PetscMPIInt *sendto2; 6371 PetscInt *nentries2; 6372 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6373 ierr = PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2);CHKERRQ(ierr); 6374 ierr = PetscArraycpy(sendto2,sendto,maxNsend);CHKERRQ(ierr); 6375 ierr = PetscArraycpy(nentries2,nentries2,maxNsend+1);CHKERRQ(ierr); 6376 ierr = PetscFree2(sendto,nentries2);CHKERRQ(ierr); 6377 sendto = sendto2; 6378 nentries = nentries2; 6379 maxNsend = maxNsend2; 6380 } 6381 sendto[nsend] = owner; 6382 nentries[nsend] = p - k; 6383 ierr = PetscCountCast(p-k,&nentries[nsend]);CHKERRQ(ierr); 6384 nsend++; 6385 k = p; 6386 } 6387 6388 /* Build 1st SF to know offsets on remote to send data */ 6389 PetscSF sf1; 6390 PetscInt nroots = 1,nroots2 = 0; 6391 PetscInt nleaves = nsend,nleaves2 = 0; 6392 PetscInt *offsets; 6393 PetscSFNode *iremote; 6394 6395 ierr = PetscSFCreate(comm,&sf1);CHKERRQ(ierr); 6396 ierr = PetscMalloc1(nsend,&iremote);CHKERRQ(ierr); 6397 ierr = PetscMalloc1(nsend,&offsets);CHKERRQ(ierr); 6398 for (k=0; k<nsend; k++) { 6399 iremote[k].rank = sendto[k]; 6400 iremote[k].index = 0; 6401 nleaves2 += nentries[k]; 6402 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6403 } 6404 ierr = PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 6405 ierr = PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM);CHKERRQ(ierr); 6406 ierr = PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM);CHKERRQ(ierr); /* Would nroots2 overflow, we check offsets[] below */ 6407 ierr = PetscSFDestroy(&sf1);CHKERRQ(ierr); 6408 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem); 6409 6410 /* Build 2nd SF to send remote COOs to their owner */ 6411 PetscSF sf2; 6412 nroots = nroots2; 6413 nleaves = nleaves2; 6414 ierr = PetscSFCreate(comm,&sf2);CHKERRQ(ierr); 6415 ierr = PetscSFSetFromOptions(sf2);CHKERRQ(ierr); 6416 ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 6417 p = 0; 6418 for (k=0; k<nsend; k++) { 6419 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6420 for (q=0; q<nentries[k]; q++,p++) { 6421 iremote[p].rank = sendto[k]; 6422 iremote[p].index = offsets[k] + q; 6423 } 6424 } 6425 ierr = PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 6426 6427 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */ 6428 ierr = PetscArraycpy(Cperm1,perm1+rem,n1-rem);CHKERRQ(ierr); 6429 6430 /* Send the remote COOs to their owner */ 6431 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6432 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6433 ierr = PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2);CHKERRQ(ierr); 6434 ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE);CHKERRQ(ierr); 6435 ierr = PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE);CHKERRQ(ierr); 6436 ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE);CHKERRQ(ierr); 6437 ierr = PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE);CHKERRQ(ierr); 6438 6439 ierr = PetscFree(offsets);CHKERRQ(ierr); 6440 ierr = PetscFree2(sendto,nentries);CHKERRQ(ierr); 6441 6442 /* ---------------------------------------------------------------*/ 6443 /* Sort received COOs by row along with the permutation array */ 6444 /* ---------------------------------------------------------------*/ 6445 for (k=0; k<n2; k++) perm2[k] = k; 6446 ierr = PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2);CHKERRQ(ierr); 6447 6448 /* ---------------------------------------------------------------*/ 6449 /* Split received COOs into diag/offdiag portions */ 6450 /* ---------------------------------------------------------------*/ 6451 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6452 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6453 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6454 6455 ierr = PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2);CHKERRQ(ierr); 6456 ierr = MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2);CHKERRQ(ierr); 6457 6458 /* --------------------------------------------------------------------------*/ 6459 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6460 /* --------------------------------------------------------------------------*/ 6461 PetscInt *Ai,*Bi; 6462 PetscInt *Aj,*Bj; 6463 6464 ierr = PetscMalloc1(m+1,&Ai);CHKERRQ(ierr); 6465 ierr = PetscMalloc1(m+1,&Bi);CHKERRQ(ierr); 6466 ierr = PetscMalloc1(Annz1+Annz2,&Aj);CHKERRQ(ierr); /* Since local and remote entries might have dups, we might allocate excess memory */ 6467 ierr = PetscMalloc1(Bnnz1+Bnnz2,&Bj);CHKERRQ(ierr); 6468 6469 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6470 ierr = PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2);CHKERRQ(ierr); 6471 6472 ierr = MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj);CHKERRQ(ierr); 6473 ierr = MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj);CHKERRQ(ierr); 6474 ierr = PetscFree3(rowBegin1,rowMid1,rowEnd1);CHKERRQ(ierr); 6475 ierr = PetscFree3(rowBegin2,rowMid2,rowEnd2);CHKERRQ(ierr); 6476 ierr = PetscFree3(i1,j1,perm1);CHKERRQ(ierr); 6477 ierr = PetscFree3(i2,j2,perm2);CHKERRQ(ierr); 6478 6479 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6480 PetscInt Annz = Ai[m]; 6481 PetscInt Bnnz = Bi[m]; 6482 if (Annz < Annz1 + Annz2) { 6483 PetscInt *Aj_new; 6484 ierr = PetscMalloc1(Annz,&Aj_new);CHKERRQ(ierr); 6485 ierr = PetscArraycpy(Aj_new,Aj,Annz);CHKERRQ(ierr); 6486 ierr = PetscFree(Aj);CHKERRQ(ierr); 6487 Aj = Aj_new; 6488 } 6489 6490 if (Bnnz < Bnnz1 + Bnnz2) { 6491 PetscInt *Bj_new; 6492 ierr = PetscMalloc1(Bnnz,&Bj_new);CHKERRQ(ierr); 6493 ierr = PetscArraycpy(Bj_new,Bj,Bnnz);CHKERRQ(ierr); 6494 ierr = PetscFree(Bj);CHKERRQ(ierr); 6495 Bj = Bj_new; 6496 } 6497 6498 /* --------------------------------------------------------------------------------*/ 6499 /* Create a MPIAIJKOKKOS newmat with CSRs of A and B, then replace mat with newmat */ 6500 /* --------------------------------------------------------------------------------*/ 6501 Mat newmat; 6502 PetscScalar *Aa,*Ba; 6503 Mat_SeqAIJ *a,*b; 6504 6505 ierr = PetscCalloc1(Annz,&Aa);CHKERRQ(ierr); /* Zero matrix on device */ 6506 ierr = PetscCalloc1(Bnnz,&Ba);CHKERRQ(ierr); 6507 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6508 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6509 ierr = MatCreateMPIAIJWithSplitArrays(comm,m,n,M,N,Ai,Aj,Aa,Bi,Bj,Ba,&newmat);CHKERRQ(ierr); /* FIXME: Can we do it without creating a new mat? */ 6510 ierr = MatHeaderMerge(mat,&newmat);CHKERRQ(ierr); /* Unlike MatHeaderReplace(), some info, ex. mat->product is kept */ 6511 mpiaij = (Mat_MPIAIJ*)mat->data; 6512 a = (Mat_SeqAIJ*)mpiaij->A->data; 6513 b = (Mat_SeqAIJ*)mpiaij->B->data; 6514 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6515 a->free_a = b->free_a = PETSC_TRUE; 6516 a->free_ij = b->free_ij = PETSC_TRUE; 6517 6518 mpiaij->coo_n = coo_n; 6519 mpiaij->coo_sf = sf2; 6520 mpiaij->sendlen = nleaves; 6521 mpiaij->recvlen = nroots; 6522 6523 mpiaij->Annz1 = Annz1; 6524 mpiaij->Annz2 = Annz2; 6525 mpiaij->Bnnz1 = Bnnz1; 6526 mpiaij->Bnnz2 = Bnnz2; 6527 6528 mpiaij->Atot1 = Atot1; 6529 mpiaij->Atot2 = Atot2; 6530 mpiaij->Btot1 = Btot1; 6531 mpiaij->Btot2 = Btot2; 6532 6533 mpiaij->Aimap1 = Aimap1; 6534 mpiaij->Aimap2 = Aimap2; 6535 mpiaij->Bimap1 = Bimap1; 6536 mpiaij->Bimap2 = Bimap2; 6537 6538 mpiaij->Ajmap1 = Ajmap1; 6539 mpiaij->Ajmap2 = Ajmap2; 6540 mpiaij->Bjmap1 = Bjmap1; 6541 mpiaij->Bjmap2 = Bjmap2; 6542 6543 mpiaij->Aperm1 = Aperm1; 6544 mpiaij->Aperm2 = Aperm2; 6545 mpiaij->Bperm1 = Bperm1; 6546 mpiaij->Bperm2 = Bperm2; 6547 6548 mpiaij->Cperm1 = Cperm1; 6549 6550 /* Allocate in preallocation. If not used, it has zero cost on host */ 6551 ierr = PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf);CHKERRQ(ierr); 6552 PetscFunctionReturn(0); 6553 } 6554 6555 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6556 { 6557 PetscErrorCode ierr; 6558 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6559 Mat A = mpiaij->A,B = mpiaij->B; 6560 PetscCount Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2; 6561 PetscScalar *Aa,*Ba; 6562 PetscScalar *sendbuf = mpiaij->sendbuf; 6563 PetscScalar *recvbuf = mpiaij->recvbuf; 6564 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2; 6565 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2; 6566 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6567 const PetscCount *Cperm1 = mpiaij->Cperm1; 6568 6569 PetscFunctionBegin; 6570 ierr = MatSeqAIJGetArray(A,&Aa);CHKERRQ(ierr); /* Might read and write matrix values */ 6571 ierr = MatSeqAIJGetArray(B,&Ba);CHKERRQ(ierr); 6572 if (imode == INSERT_VALUES) { 6573 ierr = PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr); 6574 ierr = PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr); 6575 } 6576 6577 /* Pack entries to be sent to remote */ 6578 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6579 6580 /* Send remote entries to their owner and overlap the communication with local computation */ 6581 ierr = PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE);CHKERRQ(ierr); 6582 /* Add local entries to A and B */ 6583 for (PetscCount i=0; i<Annz1; i++) { 6584 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]]; 6585 } 6586 for (PetscCount i=0; i<Bnnz1; i++) { 6587 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]]; 6588 } 6589 ierr = PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE);CHKERRQ(ierr); 6590 6591 /* Add received remote entries to A and B */ 6592 for (PetscCount i=0; i<Annz2; i++) { 6593 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6594 } 6595 for (PetscCount i=0; i<Bnnz2; i++) { 6596 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6597 } 6598 ierr = MatSeqAIJRestoreArray(A,&Aa);CHKERRQ(ierr); 6599 ierr = MatSeqAIJRestoreArray(B,&Ba);CHKERRQ(ierr); 6600 PetscFunctionReturn(0); 6601 } 6602 6603 /* ----------------------------------------------------------------*/ 6604 6605 /*MC 6606 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6607 6608 Options Database Keys: 6609 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6610 6611 Level: beginner 6612 6613 Notes: 6614 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6615 in this case the values associated with the rows and columns one passes in are set to zero 6616 in the matrix 6617 6618 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6619 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6620 6621 .seealso: MatCreateAIJ() 6622 M*/ 6623 6624 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6625 { 6626 Mat_MPIAIJ *b; 6627 PetscErrorCode ierr; 6628 PetscMPIInt size; 6629 6630 PetscFunctionBegin; 6631 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6632 6633 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6634 B->data = (void*)b; 6635 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6636 B->assembled = PETSC_FALSE; 6637 B->insertmode = NOT_SET_VALUES; 6638 b->size = size; 6639 6640 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6641 6642 /* build cache for off array entries formed */ 6643 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6644 6645 b->donotstash = PETSC_FALSE; 6646 b->colmap = NULL; 6647 b->garray = NULL; 6648 b->roworiented = PETSC_TRUE; 6649 6650 /* stuff used for matrix vector multiply */ 6651 b->lvec = NULL; 6652 b->Mvctx = NULL; 6653 6654 /* stuff for MatGetRow() */ 6655 b->rowindices = NULL; 6656 b->rowvalues = NULL; 6657 b->getrowactive = PETSC_FALSE; 6658 6659 /* flexible pointer used in CUSPARSE classes */ 6660 b->spptr = NULL; 6661 6662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6672 #if defined(PETSC_HAVE_CUDA) 6673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6674 #endif 6675 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6677 #endif 6678 #if defined(PETSC_HAVE_MKL_SPARSE) 6679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6680 #endif 6681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6685 #if defined(PETSC_HAVE_ELEMENTAL) 6686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6687 #endif 6688 #if defined(PETSC_HAVE_SCALAPACK) 6689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6690 #endif 6691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6693 #if defined(PETSC_HAVE_HYPRE) 6694 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6695 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6696 #endif 6697 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6698 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6699 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ);CHKERRQ(ierr); 6700 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ);CHKERRQ(ierr); 6701 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6702 PetscFunctionReturn(0); 6703 } 6704 6705 /*@C 6706 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6707 and "off-diagonal" part of the matrix in CSR format. 6708 6709 Collective 6710 6711 Input Parameters: 6712 + comm - MPI communicator 6713 . m - number of local rows (Cannot be PETSC_DECIDE) 6714 . n - This value should be the same as the local size used in creating the 6715 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6716 calculated if N is given) For square matrices n is almost always m. 6717 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6718 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6719 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6720 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6721 . a - matrix values 6722 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6723 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6724 - oa - matrix values 6725 6726 Output Parameter: 6727 . mat - the matrix 6728 6729 Level: advanced 6730 6731 Notes: 6732 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6733 must free the arrays once the matrix has been destroyed and not before. 6734 6735 The i and j indices are 0 based 6736 6737 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6738 6739 This sets local rows and cannot be used to set off-processor values. 6740 6741 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6742 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6743 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6744 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6745 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6746 communication if it is known that only local entries will be set. 6747 6748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6749 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6750 @*/ 6751 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6752 { 6753 PetscErrorCode ierr; 6754 Mat_MPIAIJ *maij; 6755 6756 PetscFunctionBegin; 6757 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6758 PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6759 PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6760 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6761 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6762 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6763 maij = (Mat_MPIAIJ*) (*mat)->data; 6764 6765 (*mat)->preallocated = PETSC_TRUE; 6766 6767 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6768 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6769 6770 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6771 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6772 6773 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6774 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6775 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6776 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6777 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6778 PetscFunctionReturn(0); 6779 } 6780 6781 typedef struct { 6782 Mat *mp; /* intermediate products */ 6783 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6784 PetscInt cp; /* number of intermediate products */ 6785 6786 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6787 PetscInt *startsj_s,*startsj_r; 6788 PetscScalar *bufa; 6789 Mat P_oth; 6790 6791 /* may take advantage of merging product->B */ 6792 Mat Bloc; /* B-local by merging diag and off-diag */ 6793 6794 /* cusparse does not have support to split between symbolic and numeric phases. 6795 When api_user is true, we don't need to update the numerical values 6796 of the temporary storage */ 6797 PetscBool reusesym; 6798 6799 /* support for COO values insertion */ 6800 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6801 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6802 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6803 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6804 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6805 PetscMemType mtype; 6806 6807 /* customization */ 6808 PetscBool abmerge; 6809 PetscBool P_oth_bind; 6810 } MatMatMPIAIJBACKEND; 6811 6812 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6813 { 6814 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6815 PetscInt i; 6816 PetscErrorCode ierr; 6817 6818 PetscFunctionBegin; 6819 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6820 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6821 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6822 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6823 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6824 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6825 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6826 for (i = 0; i < mmdata->cp; i++) { 6827 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6828 } 6829 ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr); 6830 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6831 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6832 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6833 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6834 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6835 PetscFunctionReturn(0); 6836 } 6837 6838 /* Copy selected n entries with indices in idx[] of A to v[]. 6839 If idx is NULL, copy the whole data array of A to v[] 6840 */ 6841 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6842 { 6843 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6844 PetscErrorCode ierr; 6845 6846 PetscFunctionBegin; 6847 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6848 if (f) { 6849 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6850 } else { 6851 const PetscScalar *vv; 6852 6853 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6854 if (n && idx) { 6855 PetscScalar *w = v; 6856 const PetscInt *oi = idx; 6857 PetscInt j; 6858 6859 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6860 } else { 6861 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6862 } 6863 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6864 } 6865 PetscFunctionReturn(0); 6866 } 6867 6868 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6869 { 6870 MatMatMPIAIJBACKEND *mmdata; 6871 PetscInt i,n_d,n_o; 6872 PetscErrorCode ierr; 6873 6874 PetscFunctionBegin; 6875 MatCheckProduct(C,1); 6876 PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6877 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6878 if (!mmdata->reusesym) { /* update temporary matrices */ 6879 if (mmdata->P_oth) { 6880 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6881 } 6882 if (mmdata->Bloc) { 6883 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6884 } 6885 } 6886 mmdata->reusesym = PETSC_FALSE; 6887 6888 for (i = 0; i < mmdata->cp; i++) { 6889 PetscCheckFalse(!mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6890 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6891 } 6892 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6893 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6894 6895 if (mmdata->mptmp[i]) continue; 6896 if (noff) { 6897 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6898 6899 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6900 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6901 n_o += noff; 6902 n_d += nown; 6903 } else { 6904 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6905 6906 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6907 n_d += mm->nz; 6908 } 6909 } 6910 if (mmdata->hasoffproc) { /* offprocess insertion */ 6911 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6912 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6913 } 6914 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6915 PetscFunctionReturn(0); 6916 } 6917 6918 /* Support for Pt * A, A * P, or Pt * A * P */ 6919 #define MAX_NUMBER_INTERMEDIATE 4 6920 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6921 { 6922 Mat_Product *product = C->product; 6923 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6924 Mat_MPIAIJ *a,*p; 6925 MatMatMPIAIJBACKEND *mmdata; 6926 ISLocalToGlobalMapping P_oth_l2g = NULL; 6927 IS glob = NULL; 6928 const char *prefix; 6929 char pprefix[256]; 6930 const PetscInt *globidx,*P_oth_idx; 6931 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6932 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6933 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6934 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6935 /* a base offset; type-2: sparse with a local to global map table */ 6936 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6937 6938 MatProductType ptype; 6939 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6940 PetscMPIInt size; 6941 PetscErrorCode ierr; 6942 6943 PetscFunctionBegin; 6944 MatCheckProduct(C,1); 6945 PetscCheckFalse(product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6946 ptype = product->type; 6947 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6948 ptype = MATPRODUCT_AB; 6949 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6950 } 6951 switch (ptype) { 6952 case MATPRODUCT_AB: 6953 A = product->A; 6954 P = product->B; 6955 m = A->rmap->n; 6956 n = P->cmap->n; 6957 M = A->rmap->N; 6958 N = P->cmap->N; 6959 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6960 break; 6961 case MATPRODUCT_AtB: 6962 P = product->A; 6963 A = product->B; 6964 m = P->cmap->n; 6965 n = A->cmap->n; 6966 M = P->cmap->N; 6967 N = A->cmap->N; 6968 hasoffproc = PETSC_TRUE; 6969 break; 6970 case MATPRODUCT_PtAP: 6971 A = product->A; 6972 P = product->B; 6973 m = P->cmap->n; 6974 n = P->cmap->n; 6975 M = P->cmap->N; 6976 N = P->cmap->N; 6977 hasoffproc = PETSC_TRUE; 6978 break; 6979 default: 6980 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6981 } 6982 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6983 if (size == 1) hasoffproc = PETSC_FALSE; 6984 6985 /* defaults */ 6986 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6987 mp[i] = NULL; 6988 mptmp[i] = PETSC_FALSE; 6989 rmapt[i] = -1; 6990 cmapt[i] = -1; 6991 rmapa[i] = NULL; 6992 cmapa[i] = NULL; 6993 } 6994 6995 /* customization */ 6996 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6997 mmdata->reusesym = product->api_user; 6998 if (ptype == MATPRODUCT_AB) { 6999 if (product->api_user) { 7000 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7001 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 7002 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 7003 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7004 } else { 7005 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7006 ierr = PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 7007 ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 7008 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7009 } 7010 } else if (ptype == MATPRODUCT_PtAP) { 7011 if (product->api_user) { 7012 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7013 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 7014 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7015 } else { 7016 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7017 ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 7018 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7019 } 7020 } 7021 a = (Mat_MPIAIJ*)A->data; 7022 p = (Mat_MPIAIJ*)P->data; 7023 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 7024 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 7025 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 7026 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 7027 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 7028 7029 cp = 0; 7030 switch (ptype) { 7031 case MATPRODUCT_AB: /* A * P */ 7032 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 7033 7034 /* A_diag * P_local (merged or not) */ 7035 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7036 /* P is product->B */ 7037 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 7038 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7039 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7040 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7041 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7042 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7043 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7044 mp[cp]->product->api_user = product->api_user; 7045 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7046 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7047 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7048 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 7049 rmapt[cp] = 1; 7050 cmapt[cp] = 2; 7051 cmapa[cp] = globidx; 7052 mptmp[cp] = PETSC_FALSE; 7053 cp++; 7054 } else { /* A_diag * P_diag and A_diag * P_off */ 7055 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 7056 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7057 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7058 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7059 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7060 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7061 mp[cp]->product->api_user = product->api_user; 7062 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7063 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7064 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7065 rmapt[cp] = 1; 7066 cmapt[cp] = 1; 7067 mptmp[cp] = PETSC_FALSE; 7068 cp++; 7069 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 7070 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7071 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7072 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7073 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7074 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7075 mp[cp]->product->api_user = product->api_user; 7076 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7077 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7078 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7079 rmapt[cp] = 1; 7080 cmapt[cp] = 2; 7081 cmapa[cp] = p->garray; 7082 mptmp[cp] = PETSC_FALSE; 7083 cp++; 7084 } 7085 7086 /* A_off * P_other */ 7087 if (mmdata->P_oth) { 7088 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */ 7089 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 7090 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 7091 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 7092 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 7093 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7094 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7095 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7096 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7097 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7098 mp[cp]->product->api_user = product->api_user; 7099 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7100 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7101 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7102 rmapt[cp] = 1; 7103 cmapt[cp] = 2; 7104 cmapa[cp] = P_oth_idx; 7105 mptmp[cp] = PETSC_FALSE; 7106 cp++; 7107 } 7108 break; 7109 7110 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7111 /* A is product->B */ 7112 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 7113 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7114 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7115 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 7116 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7117 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7118 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7119 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7120 mp[cp]->product->api_user = product->api_user; 7121 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7122 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7123 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7124 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 7125 rmapt[cp] = 2; 7126 rmapa[cp] = globidx; 7127 cmapt[cp] = 2; 7128 cmapa[cp] = globidx; 7129 mptmp[cp] = PETSC_FALSE; 7130 cp++; 7131 } else { 7132 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7133 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 7134 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7135 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7136 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7137 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7138 mp[cp]->product->api_user = product->api_user; 7139 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7140 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7141 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7142 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 7143 rmapt[cp] = 1; 7144 cmapt[cp] = 2; 7145 cmapa[cp] = globidx; 7146 mptmp[cp] = PETSC_FALSE; 7147 cp++; 7148 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7149 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 7150 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7151 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7152 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7153 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7154 mp[cp]->product->api_user = product->api_user; 7155 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7156 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7157 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7158 rmapt[cp] = 2; 7159 rmapa[cp] = p->garray; 7160 cmapt[cp] = 2; 7161 cmapa[cp] = globidx; 7162 mptmp[cp] = PETSC_FALSE; 7163 cp++; 7164 } 7165 break; 7166 case MATPRODUCT_PtAP: 7167 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 7168 /* P is product->B */ 7169 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 7170 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7171 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 7172 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7173 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7174 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7175 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7176 mp[cp]->product->api_user = product->api_user; 7177 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7178 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7179 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7180 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 7181 rmapt[cp] = 2; 7182 rmapa[cp] = globidx; 7183 cmapt[cp] = 2; 7184 cmapa[cp] = globidx; 7185 mptmp[cp] = PETSC_FALSE; 7186 cp++; 7187 if (mmdata->P_oth) { 7188 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 7189 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 7190 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 7191 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 7192 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 7193 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7194 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7195 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7196 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7197 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7198 mp[cp]->product->api_user = product->api_user; 7199 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7200 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7201 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7202 mptmp[cp] = PETSC_TRUE; 7203 cp++; 7204 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 7205 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 7206 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7207 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7208 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7209 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7210 mp[cp]->product->api_user = product->api_user; 7211 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7212 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7213 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7214 rmapt[cp] = 2; 7215 rmapa[cp] = globidx; 7216 cmapt[cp] = 2; 7217 cmapa[cp] = P_oth_idx; 7218 mptmp[cp] = PETSC_FALSE; 7219 cp++; 7220 } 7221 break; 7222 default: 7223 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7224 } 7225 /* sanity check */ 7226 if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7227 7228 ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr); 7229 for (i = 0; i < cp; i++) { 7230 mmdata->mp[i] = mp[i]; 7231 mmdata->mptmp[i] = mptmp[i]; 7232 } 7233 mmdata->cp = cp; 7234 C->product->data = mmdata; 7235 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7236 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7237 7238 /* memory type */ 7239 mmdata->mtype = PETSC_MEMTYPE_HOST; 7240 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 7241 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 7242 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7243 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 7244 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 7245 #endif 7246 7247 /* prepare coo coordinates for values insertion */ 7248 7249 /* count total nonzeros of those intermediate seqaij Mats 7250 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7251 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7252 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7253 */ 7254 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7255 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7256 if (mptmp[cp]) continue; 7257 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7258 const PetscInt *rmap = rmapa[cp]; 7259 const PetscInt mr = mp[cp]->rmap->n; 7260 const PetscInt rs = C->rmap->rstart; 7261 const PetscInt re = C->rmap->rend; 7262 const PetscInt *ii = mm->i; 7263 for (i = 0; i < mr; i++) { 7264 const PetscInt gr = rmap[i]; 7265 const PetscInt nz = ii[i+1] - ii[i]; 7266 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7267 else ncoo_oown += nz; /* this row is local */ 7268 } 7269 } else ncoo_d += mm->nz; 7270 } 7271 7272 /* 7273 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7274 7275 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7276 7277 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7278 7279 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7280 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7281 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7282 7283 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7284 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7285 */ 7286 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */ 7287 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 7288 7289 /* gather (i,j) of nonzeros inserted by remote procs */ 7290 if (hasoffproc) { 7291 PetscSF msf; 7292 PetscInt ncoo2,*coo_i2,*coo_j2; 7293 7294 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 7295 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 7296 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */ 7297 7298 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7299 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7300 PetscInt *idxoff = mmdata->off[cp]; 7301 PetscInt *idxown = mmdata->own[cp]; 7302 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7303 const PetscInt *rmap = rmapa[cp]; 7304 const PetscInt *cmap = cmapa[cp]; 7305 const PetscInt *ii = mm->i; 7306 PetscInt *coi = coo_i + ncoo_o; 7307 PetscInt *coj = coo_j + ncoo_o; 7308 const PetscInt mr = mp[cp]->rmap->n; 7309 const PetscInt rs = C->rmap->rstart; 7310 const PetscInt re = C->rmap->rend; 7311 const PetscInt cs = C->cmap->rstart; 7312 for (i = 0; i < mr; i++) { 7313 const PetscInt *jj = mm->j + ii[i]; 7314 const PetscInt gr = rmap[i]; 7315 const PetscInt nz = ii[i+1] - ii[i]; 7316 if (gr < rs || gr >= re) { /* this is an offproc row */ 7317 for (j = ii[i]; j < ii[i+1]; j++) { 7318 *coi++ = gr; 7319 *idxoff++ = j; 7320 } 7321 if (!cmapt[cp]) { /* already global */ 7322 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7323 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7324 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7325 } else { /* offdiag */ 7326 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7327 } 7328 ncoo_o += nz; 7329 } else { /* this is a local row */ 7330 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7331 } 7332 } 7333 } 7334 mmdata->off[cp + 1] = idxoff; 7335 mmdata->own[cp + 1] = idxown; 7336 } 7337 7338 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 7339 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 7340 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 7341 ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr); 7342 ncoo = ncoo_d + ncoo_oown + ncoo2; 7343 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 7344 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */ 7345 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 7346 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 7347 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 7348 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 7349 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7350 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 7351 coo_i = coo_i2; 7352 coo_j = coo_j2; 7353 } else { /* no offproc values insertion */ 7354 ncoo = ncoo_d; 7355 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 7356 7357 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 7358 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 7359 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 7360 } 7361 mmdata->hasoffproc = hasoffproc; 7362 7363 /* gather (i,j) of nonzeros inserted locally */ 7364 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7365 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7366 PetscInt *coi = coo_i + ncoo_d; 7367 PetscInt *coj = coo_j + ncoo_d; 7368 const PetscInt *jj = mm->j; 7369 const PetscInt *ii = mm->i; 7370 const PetscInt *cmap = cmapa[cp]; 7371 const PetscInt *rmap = rmapa[cp]; 7372 const PetscInt mr = mp[cp]->rmap->n; 7373 const PetscInt rs = C->rmap->rstart; 7374 const PetscInt re = C->rmap->rend; 7375 const PetscInt cs = C->cmap->rstart; 7376 7377 if (mptmp[cp]) continue; 7378 if (rmapt[cp] == 1) { /* consecutive rows */ 7379 /* fill coo_i */ 7380 for (i = 0; i < mr; i++) { 7381 const PetscInt gr = i + rs; 7382 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7383 } 7384 /* fill coo_j */ 7385 if (!cmapt[cp]) { /* type-0, already global */ 7386 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 7387 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7388 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7389 } else { /* type-2, local to global for sparse columns */ 7390 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7391 } 7392 ncoo_d += mm->nz; 7393 } else if (rmapt[cp] == 2) { /* sparse rows */ 7394 for (i = 0; i < mr; i++) { 7395 const PetscInt *jj = mm->j + ii[i]; 7396 const PetscInt gr = rmap[i]; 7397 const PetscInt nz = ii[i+1] - ii[i]; 7398 if (gr >= rs && gr < re) { /* local rows */ 7399 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7400 if (!cmapt[cp]) { /* type-0, already global */ 7401 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7402 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7403 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7404 } else { /* type-2, local to global for sparse columns */ 7405 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7406 } 7407 ncoo_d += nz; 7408 } 7409 } 7410 } 7411 } 7412 if (glob) { 7413 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 7414 } 7415 ierr = ISDestroy(&glob);CHKERRQ(ierr); 7416 if (P_oth_l2g) { 7417 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 7418 } 7419 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 7420 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7421 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 7422 7423 /* preallocate with COO data */ 7424 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 7425 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 7426 PetscFunctionReturn(0); 7427 } 7428 7429 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7430 { 7431 Mat_Product *product = mat->product; 7432 PetscErrorCode ierr; 7433 #if defined(PETSC_HAVE_DEVICE) 7434 PetscBool match = PETSC_FALSE; 7435 PetscBool usecpu = PETSC_FALSE; 7436 #else 7437 PetscBool match = PETSC_TRUE; 7438 #endif 7439 7440 PetscFunctionBegin; 7441 MatCheckProduct(mat,1); 7442 #if defined(PETSC_HAVE_DEVICE) 7443 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7444 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 7445 } 7446 if (match) { /* we can always fallback to the CPU if requested */ 7447 switch (product->type) { 7448 case MATPRODUCT_AB: 7449 if (product->api_user) { 7450 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7451 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7452 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7453 } else { 7454 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7455 ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7456 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7457 } 7458 break; 7459 case MATPRODUCT_AtB: 7460 if (product->api_user) { 7461 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7462 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7463 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7464 } else { 7465 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7466 ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7467 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7468 } 7469 break; 7470 case MATPRODUCT_PtAP: 7471 if (product->api_user) { 7472 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7473 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7474 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7475 } else { 7476 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7477 ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7478 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7479 } 7480 break; 7481 default: 7482 break; 7483 } 7484 match = (PetscBool)!usecpu; 7485 } 7486 #endif 7487 if (match) { 7488 switch (product->type) { 7489 case MATPRODUCT_AB: 7490 case MATPRODUCT_AtB: 7491 case MATPRODUCT_PtAP: 7492 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7493 break; 7494 default: 7495 break; 7496 } 7497 } 7498 /* fallback to MPIAIJ ops */ 7499 if (!mat->ops->productsymbolic) { 7500 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7501 } 7502 PetscFunctionReturn(0); 7503 } 7504 7505 /* 7506 Special version for direct calls from Fortran 7507 */ 7508 #include <petsc/private/fortranimpl.h> 7509 7510 /* Change these macros so can be used in void function */ 7511 /* Identical to CHKERRV, except it assigns to *_ierr */ 7512 #undef CHKERRQ 7513 #define CHKERRQ(ierr) do { \ 7514 PetscErrorCode ierr_msv_mpiaij = (ierr); \ 7515 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7516 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7517 return; \ 7518 } \ 7519 } while (0) 7520 7521 #undef SETERRQ 7522 #define SETERRQ(comm,ierr,...) do { \ 7523 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7524 return; \ 7525 } while (0) 7526 7527 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7528 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7529 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7530 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7531 #else 7532 #endif 7533 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7534 { 7535 Mat mat = *mmat; 7536 PetscInt m = *mm, n = *mn; 7537 InsertMode addv = *maddv; 7538 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7539 PetscScalar value; 7540 PetscErrorCode ierr; 7541 7542 MatCheckPreallocated(mat,1); 7543 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7544 else PetscCheckFalse(mat->insertmode != addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7545 { 7546 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7547 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7548 PetscBool roworiented = aij->roworiented; 7549 7550 /* Some Variables required in the macro */ 7551 Mat A = aij->A; 7552 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7553 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7554 MatScalar *aa; 7555 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7556 Mat B = aij->B; 7557 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7558 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7559 MatScalar *ba; 7560 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7561 * cannot use "#if defined" inside a macro. */ 7562 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7563 7564 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7565 PetscInt nonew = a->nonew; 7566 MatScalar *ap1,*ap2; 7567 7568 PetscFunctionBegin; 7569 ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr); 7570 ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr); 7571 for (i=0; i<m; i++) { 7572 if (im[i] < 0) continue; 7573 PetscAssertFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 7574 if (im[i] >= rstart && im[i] < rend) { 7575 row = im[i] - rstart; 7576 lastcol1 = -1; 7577 rp1 = aj + ai[row]; 7578 ap1 = aa + ai[row]; 7579 rmax1 = aimax[row]; 7580 nrow1 = ailen[row]; 7581 low1 = 0; 7582 high1 = nrow1; 7583 lastcol2 = -1; 7584 rp2 = bj + bi[row]; 7585 ap2 = ba + bi[row]; 7586 rmax2 = bimax[row]; 7587 nrow2 = bilen[row]; 7588 low2 = 0; 7589 high2 = nrow2; 7590 7591 for (j=0; j<n; j++) { 7592 if (roworiented) value = v[i*n+j]; 7593 else value = v[i+j*m]; 7594 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7595 if (in[j] >= cstart && in[j] < cend) { 7596 col = in[j] - cstart; 7597 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7598 } else if (in[j] < 0) continue; 7599 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7600 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7601 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 7602 } else { 7603 if (mat->was_assembled) { 7604 if (!aij->colmap) { 7605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 7606 } 7607 #if defined(PETSC_USE_CTABLE) 7608 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 7609 col--; 7610 #else 7611 col = aij->colmap[in[j]] - 1; 7612 #endif 7613 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 7614 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 7615 col = in[j]; 7616 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7617 B = aij->B; 7618 b = (Mat_SeqAIJ*)B->data; 7619 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 7620 rp2 = bj + bi[row]; 7621 ap2 = ba + bi[row]; 7622 rmax2 = bimax[row]; 7623 nrow2 = bilen[row]; 7624 low2 = 0; 7625 high2 = nrow2; 7626 bm = aij->B->rmap->n; 7627 ba = b->a; 7628 inserted = PETSC_FALSE; 7629 } 7630 } else col = in[j]; 7631 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 7632 } 7633 } 7634 } else if (!aij->donotstash) { 7635 if (roworiented) { 7636 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 7637 } else { 7638 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 7639 } 7640 } 7641 } 7642 ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr); 7643 ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr); 7644 } 7645 PetscFunctionReturnVoid(); 7646 } 7647 /* Undefining these here since they were redefined from their original definition above! No 7648 * other PETSc functions should be defined past this point, as it is impossible to recover the 7649 * original definitions */ 7650 #undef CHKERRQ 7651 #undef SETERRQ 7652