1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 51 PetscFunctionBegin; 52 if (mat->A) { 53 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 54 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 60 { 61 PetscErrorCode ierr; 62 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 64 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 65 const PetscInt *ia,*ib; 66 const MatScalar *aa,*bb; 67 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 68 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 69 70 PetscFunctionBegin; 71 *keptrows = 0; 72 ia = a->i; 73 ib = b->i; 74 for (i=0; i<m; i++) { 75 na = ia[i+1] - ia[i]; 76 nb = ib[i+1] - ib[i]; 77 if (!na && !nb) { 78 cnt++; 79 goto ok1; 80 } 81 aa = a->a + ia[i]; 82 for (j=0; j<na; j++) { 83 if (aa[j] != 0.0) goto ok1; 84 } 85 bb = b->a + ib[i]; 86 for (j=0; j <nb; j++) { 87 if (bb[j] != 0.0) goto ok1; 88 } 89 cnt++; 90 ok1:; 91 } 92 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 93 if (!n0rows) PetscFunctionReturn(0); 94 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) continue; 100 aa = a->a + ia[i]; 101 for (j=0; j<na;j++) { 102 if (aa[j] != 0.0) { 103 rows[cnt++] = rstart + i; 104 goto ok2; 105 } 106 } 107 bb = b->a + ib[i]; 108 for (j=0; j<nb; j++) { 109 if (bb[j] != 0.0) { 110 rows[cnt++] = rstart + i; 111 goto ok2; 112 } 113 } 114 ok2:; 115 } 116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 121 { 122 PetscErrorCode ierr; 123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 124 125 PetscFunctionBegin; 126 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 127 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 128 } else { 129 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 130 } 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 462 { \ 463 if (col <= lastcol2) low2 = 0; \ 464 else high2 = nrow2; \ 465 lastcol2 = col; \ 466 while (high2-low2 > 5) { \ 467 t = (low2+high2)/2; \ 468 if (rp2[t] > col) high2 = t; \ 469 else low2 = t; \ 470 } \ 471 for (_i=low2; _i<high2; _i++) { \ 472 if (rp2[_i] > col) break; \ 473 if (rp2[_i] == col) { \ 474 if (addv == ADD_VALUES) ap2[_i] += value; \ 475 else ap2[_i] = value; \ 476 goto b_noinsert; \ 477 } \ 478 } \ 479 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 480 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 482 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 483 N = nrow2++ - 1; b->nz++; high2++; \ 484 /* shift up all the later entries in this row */ \ 485 for (ii=N; ii>=_i; ii--) { \ 486 rp2[ii+1] = rp2[ii]; \ 487 ap2[ii+1] = ap2[ii]; \ 488 } \ 489 rp2[_i] = col; \ 490 ap2[_i] = value; \ 491 B->nonzerostate++; \ 492 b_noinsert: ; \ 493 bilen[row] = nrow2; \ 494 } 495 496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 497 { 498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 500 PetscErrorCode ierr; 501 PetscInt l,*garray = mat->garray,diag; 502 503 PetscFunctionBegin; 504 /* code only works for square matrices A */ 505 506 /* find size of row to the left of the diagonal part */ 507 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 508 row = row - diag; 509 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 510 if (garray[b->j[b->i[row]+l]] > diag) break; 511 } 512 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 513 514 /* diagonal part */ 515 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* right of diagonal part */ 518 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 523 { 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 525 PetscScalar value; 526 PetscErrorCode ierr; 527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 529 PetscBool roworiented = aij->roworiented; 530 531 /* Some Variables required in the macro */ 532 Mat A = aij->A; 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 535 MatScalar *aa = a->a; 536 PetscBool ignorezeroentries = a->ignorezeroentries; 537 Mat B = aij->B; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 540 MatScalar *ba = b->a; 541 542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 543 PetscInt nonew; 544 MatScalar *ap1,*ap2; 545 546 PetscFunctionBegin; 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 lastcol1 = -1; 555 rp1 = aj + ai[row]; 556 ap1 = aa + ai[row]; 557 rmax1 = aimax[row]; 558 nrow1 = ailen[row]; 559 low1 = 0; 560 high1 = nrow1; 561 lastcol2 = -1; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 569 for (j=0; j<n; j++) { 570 if (roworiented) value = v[i*n+j]; 571 else value = v[i+j*m]; 572 if (in[j] >= cstart && in[j] < cend) { 573 col = in[j] - cstart; 574 nonew = a->nonew; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 576 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 577 } else if (in[j] < 0) continue; 578 #if defined(PETSC_USE_DEBUG) 579 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 580 #endif 581 else { 582 if (mat->was_assembled) { 583 if (!aij->colmap) { 584 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 593 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ*)B->data; 598 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 599 rp2 = bj + bi[row]; 600 ap2 = ba + bi[row]; 601 rmax2 = bimax[row]; 602 nrow2 = bilen[row]; 603 low2 = 0; 604 high2 = nrow2; 605 bm = aij->B->rmap->n; 606 ba = b->a; 607 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 608 } else col = in[j]; 609 nonew = b->nonew; 610 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 611 } 612 } 613 } else { 614 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 615 if (!aij->donotstash) { 616 mat->assembled = PETSC_FALSE; 617 if (roworiented) { 618 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!aij->colmap) { 649 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 669 670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscInt nstash,reallocs; 675 676 PetscFunctionBegin; 677 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 678 679 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 680 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 681 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 689 PetscErrorCode ierr; 690 PetscMPIInt n; 691 PetscInt i,j,rstart,ncols,flg; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscScalar *val; 695 696 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 697 698 PetscFunctionBegin; 699 if (!aij->donotstash && !mat->nooffprocentries) { 700 while (1) { 701 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 702 if (!flg) break; 703 704 for (i=0; i<n; ) { 705 /* Now identify the consecutive vals belonging to the same row */ 706 for (j=i,rstart=row[j]; j<n; j++) { 707 if (row[j] != rstart) break; 708 } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 713 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 } 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourselfs, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 730 if (mat->was_assembled && !other_disassembled) { 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 740 741 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 742 743 aij->rowvalues = 0; 744 745 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 746 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 747 748 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 749 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 750 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 751 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 757 { 758 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 763 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 768 { 769 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 770 PetscInt *lrows; 771 PetscInt r, len; 772 PetscErrorCode ierr; 773 774 PetscFunctionBegin; 775 /* get locally owned rows */ 776 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 789 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 790 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 791 PetscBool cong; 792 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 793 if (cong) A->congruentlayouts = 1; 794 else A->congruentlayouts = 0; 795 } 796 if ((diag != 0.0) && A->congruentlayouts) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 821 { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 823 PetscErrorCode ierr; 824 PetscMPIInt n = A->rmap->n; 825 PetscInt i,j,r,m,p = 0,len = 0; 826 PetscInt *lrows,*owners = A->rmap->range; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb,*mask; 831 Vec xmask,lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 833 const PetscInt *aj, *ii,*ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 844 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 852 /* Collect flags for rows to be zeroed */ 853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 858 /* zero diagonal part of matrix */ 859 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 860 /* handle off diagonal part of matrix */ 861 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 862 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 863 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 864 for (i=0; i<len; i++) bb[lrows[i]] = 1; 865 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 866 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 869 if (x) { 870 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 873 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 874 } 875 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 876 /* remove zeroed rows of off diagonal matrix */ 877 ii = aij->i; 878 for (i=0; i<len; i++) { 879 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 /* loop over all elements of off process part of matrix zeroing removed columns*/ 882 if (aij->compressedrow.use) { 883 m = aij->compressedrow.nrows; 884 ii = aij->compressedrow.i; 885 ridx = aij->compressedrow.rindex; 886 for (i=0; i<m; i++) { 887 n = ii[i+1] - ii[i]; 888 aj = aij->j + ii[i]; 889 aa = aij->a + ii[i]; 890 891 for (j=0; j<n; j++) { 892 if (PetscAbsScalar(mask[*aj])) { 893 if (b) bb[*ridx] -= *aa*xx[*aj]; 894 *aa = 0.0; 895 } 896 aa++; 897 aj++; 898 } 899 ridx++; 900 } 901 } else { /* do not use compressed row format */ 902 m = l->B->rmap->n; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij->a + ii[i]; 907 for (j=0; j<n; j++) { 908 if (PetscAbsScalar(mask[*aj])) { 909 if (b) bb[i] -= *aa*xx[*aj]; 910 *aa = 0.0; 911 } 912 aa++; 913 aj++; 914 } 915 } 916 } 917 if (x) { 918 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 919 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 } 921 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 923 ierr = PetscFree(lrows);CHKERRQ(ierr); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 927 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 928 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 VecScatter Mvctx = a->Mvctx; 939 940 PetscFunctionBegin; 941 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 942 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 943 if (a->Mvctx_mpi1_flg) { 944 Mvctx = a->Mvctx_mpi1; 945 #if 0 946 MPI_Comm comm; 947 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 948 ierr = VecView(xx,PETSC_VIEWER_STDOUT_(comm));CHKERRQ(ierr);CHKERRQ(ierr); 949 ierr = VecSet(a->lvec,0.0);CHKERRQ(ierr); 950 ierr = VecView(a->lvec,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);CHKERRQ(ierr); 951 #endif 952 } 953 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 954 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 955 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 956 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 960 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 961 { 962 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 971 { 972 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 973 PetscErrorCode ierr; 974 975 PetscFunctionBegin; 976 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 978 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 979 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 PetscBool merged; 988 989 PetscFunctionBegin; 990 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 991 /* do nondiagonal part */ 992 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 993 if (!merged) { 994 /* send it on its way */ 995 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 996 /* do local part */ 997 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 998 /* receive remote parts: note this assumes the values are not actually */ 999 /* added in yy until the next line, */ 1000 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1001 } else { 1002 /* do local part */ 1003 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1004 /* send it on its way */ 1005 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1006 /* values actually were received in the Begin() but we need to call this nop */ 1007 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1013 { 1014 MPI_Comm comm; 1015 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1016 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1017 IS Me,Notme; 1018 PetscErrorCode ierr; 1019 PetscInt M,N,first,last,*notme,i; 1020 PetscMPIInt size; 1021 1022 PetscFunctionBegin; 1023 /* Easy test: symmetric diagonal block */ 1024 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1025 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1026 if (!*f) PetscFunctionReturn(0); 1027 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1028 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1029 if (size == 1) PetscFunctionReturn(0); 1030 1031 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1032 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1033 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1034 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1035 for (i=0; i<first; i++) notme[i] = i; 1036 for (i=last; i<M; i++) notme[i-last+first] = i; 1037 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1038 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1039 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1040 Aoff = Aoffs[0]; 1041 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1042 Boff = Boffs[0]; 1043 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1044 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1045 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1046 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1047 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1048 ierr = PetscFree(notme);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1053 { 1054 PetscErrorCode ierr; 1055 1056 PetscFunctionBegin; 1057 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1058 PetscFunctionReturn(0); 1059 } 1060 1061 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1062 { 1063 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1064 PetscErrorCode ierr; 1065 1066 PetscFunctionBegin; 1067 /* do nondiagonal part */ 1068 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1069 /* send it on its way */ 1070 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1071 /* do local part */ 1072 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1073 /* receive remote parts */ 1074 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 /* 1079 This only works correctly for square matrices where the subblock A->A is the 1080 diagonal block 1081 */ 1082 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1083 { 1084 PetscErrorCode ierr; 1085 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1086 1087 PetscFunctionBegin; 1088 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1089 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1090 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1095 { 1096 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1101 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1106 { 1107 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 #if defined(PETSC_USE_LOG) 1112 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1113 #endif 1114 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1115 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1116 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1117 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1118 #if defined(PETSC_USE_CTABLE) 1119 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1120 #else 1121 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1122 #endif 1123 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1124 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1125 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1126 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1127 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1128 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1129 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1130 1131 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1134 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1140 #if defined(PETSC_HAVE_ELEMENTAL) 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1142 #endif 1143 #if defined(PETSC_HAVE_HYPRE) 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1146 #endif 1147 PetscFunctionReturn(0); 1148 } 1149 1150 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1151 { 1152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1153 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1154 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1155 PetscErrorCode ierr; 1156 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1157 int fd; 1158 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1159 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1160 PetscScalar *column_values; 1161 PetscInt message_count,flowcontrolcount; 1162 FILE *file; 1163 1164 PetscFunctionBegin; 1165 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1166 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1167 nz = A->nz + B->nz; 1168 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1169 if (!rank) { 1170 header[0] = MAT_FILE_CLASSID; 1171 header[1] = mat->rmap->N; 1172 header[2] = mat->cmap->N; 1173 1174 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1175 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1176 /* get largest number of rows any processor has */ 1177 rlen = mat->rmap->n; 1178 range = mat->rmap->range; 1179 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1180 } else { 1181 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1182 rlen = mat->rmap->n; 1183 } 1184 1185 /* load up the local row counts */ 1186 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1187 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1188 1189 /* store the row lengths to the file */ 1190 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1191 if (!rank) { 1192 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1193 for (i=1; i<size; i++) { 1194 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1195 rlen = range[i+1] - range[i]; 1196 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1197 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1198 } 1199 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1200 } else { 1201 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1202 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1204 } 1205 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1206 1207 /* load up the local column indices */ 1208 nzmax = nz; /* th processor needs space a largest processor needs */ 1209 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1210 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1211 cnt = 0; 1212 for (i=0; i<mat->rmap->n; i++) { 1213 for (j=B->i[i]; j<B->i[i+1]; j++) { 1214 if ((col = garray[B->j[j]]) > cstart) break; 1215 column_indices[cnt++] = col; 1216 } 1217 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1218 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1219 } 1220 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1221 1222 /* store the column indices to the file */ 1223 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1224 if (!rank) { 1225 MPI_Status status; 1226 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1227 for (i=1; i<size; i++) { 1228 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1229 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1230 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1231 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1233 } 1234 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1235 } else { 1236 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1237 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1238 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1240 } 1241 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1242 1243 /* load up the local column values */ 1244 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1245 cnt = 0; 1246 for (i=0; i<mat->rmap->n; i++) { 1247 for (j=B->i[i]; j<B->i[i+1]; j++) { 1248 if (garray[B->j[j]] > cstart) break; 1249 column_values[cnt++] = B->a[j]; 1250 } 1251 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1252 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1253 } 1254 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1255 1256 /* store the column values to the file */ 1257 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1258 if (!rank) { 1259 MPI_Status status; 1260 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1261 for (i=1; i<size; i++) { 1262 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1263 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1264 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1265 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1266 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1267 } 1268 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1269 } else { 1270 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1271 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1272 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1273 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1274 } 1275 ierr = PetscFree(column_values);CHKERRQ(ierr); 1276 1277 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1278 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #include <petscdraw.h> 1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 PetscErrorCode ierr; 1287 PetscMPIInt rank = aij->rank,size = aij->size; 1288 PetscBool isdraw,iascii,isbinary; 1289 PetscViewer sviewer; 1290 PetscViewerFormat format; 1291 1292 PetscFunctionBegin; 1293 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1294 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1295 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1296 if (iascii) { 1297 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1298 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1299 MatInfo info; 1300 PetscBool inodes; 1301 1302 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1303 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1304 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1306 if (!inodes) { 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1308 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1309 } else { 1310 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1311 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1312 } 1313 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1315 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1317 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1318 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1320 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1323 PetscInt inodecount,inodelimit,*inodes; 1324 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1325 if (inodes) { 1326 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1332 PetscFunctionReturn(0); 1333 } 1334 } else if (isbinary) { 1335 if (size == 1) { 1336 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1337 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1338 } else { 1339 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (isdraw) { 1343 PetscDraw draw; 1344 PetscBool isnull; 1345 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1346 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1347 if (isnull) PetscFunctionReturn(0); 1348 } 1349 1350 { 1351 /* assemble the entire matrix onto first processor. */ 1352 Mat A; 1353 Mat_SeqAIJ *Aloc; 1354 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1355 MatScalar *a; 1356 1357 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1358 if (!rank) { 1359 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1360 } else { 1361 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1362 } 1363 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1364 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1365 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1366 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1367 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1368 1369 /* copy over the A part */ 1370 Aloc = (Mat_SeqAIJ*)aij->A->data; 1371 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1372 row = mat->rmap->rstart; 1373 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1374 for (i=0; i<m; i++) { 1375 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1376 row++; 1377 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1378 } 1379 aj = Aloc->j; 1380 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1381 1382 /* copy over the B part */ 1383 Aloc = (Mat_SeqAIJ*)aij->B->data; 1384 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1385 row = mat->rmap->rstart; 1386 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1387 ct = cols; 1388 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1389 for (i=0; i<m; i++) { 1390 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1391 row++; 1392 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1393 } 1394 ierr = PetscFree(ct);CHKERRQ(ierr); 1395 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1396 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 /* 1398 Everyone has to call to draw the matrix since the graphics waits are 1399 synchronized across all processors that share the PetscDraw object 1400 */ 1401 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1402 if (!rank) { 1403 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1404 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1405 } 1406 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1407 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1408 ierr = MatDestroy(&A);CHKERRQ(ierr); 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1414 { 1415 PetscErrorCode ierr; 1416 PetscBool iascii,isdraw,issocket,isbinary; 1417 1418 PetscFunctionBegin; 1419 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1420 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1421 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1422 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1423 if (iascii || isdraw || isbinary || issocket) { 1424 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1425 } 1426 PetscFunctionReturn(0); 1427 } 1428 1429 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1430 { 1431 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1432 PetscErrorCode ierr; 1433 Vec bb1 = 0; 1434 PetscBool hasop; 1435 1436 PetscFunctionBegin; 1437 if (flag == SOR_APPLY_UPPER) { 1438 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1443 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1444 } 1445 1446 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1447 if (flag & SOR_ZERO_INITIAL_GUESS) { 1448 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1449 its--; 1450 } 1451 1452 while (its--) { 1453 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1454 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1458 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1459 1460 /* local sweep */ 1461 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1462 } 1463 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1464 if (flag & SOR_ZERO_INITIAL_GUESS) { 1465 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1466 its--; 1467 } 1468 while (its--) { 1469 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1470 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 1472 /* update rhs: bb1 = bb - B*x */ 1473 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1474 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1475 1476 /* local sweep */ 1477 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1478 } 1479 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1480 if (flag & SOR_ZERO_INITIAL_GUESS) { 1481 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1482 its--; 1483 } 1484 while (its--) { 1485 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 1488 /* update rhs: bb1 = bb - B*x */ 1489 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1490 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1491 1492 /* local sweep */ 1493 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1494 } 1495 } else if (flag & SOR_EISENSTAT) { 1496 Vec xx1; 1497 1498 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1499 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1500 1501 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1502 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 if (!mat->diag) { 1504 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1505 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1506 } 1507 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1508 if (hasop) { 1509 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1510 } else { 1511 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1512 } 1513 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1514 1515 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1516 1517 /* local sweep */ 1518 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1519 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1520 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1521 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1522 1523 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1524 1525 matin->factorerrortype = mat->A->factorerrortype; 1526 PetscFunctionReturn(0); 1527 } 1528 1529 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1530 { 1531 Mat aA,aB,Aperm; 1532 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1533 PetscScalar *aa,*ba; 1534 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1535 PetscSF rowsf,sf; 1536 IS parcolp = NULL; 1537 PetscBool done; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1542 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1543 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1544 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1545 1546 /* Invert row permutation to find out where my rows should go */ 1547 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1548 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1549 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1550 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1551 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1552 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1553 1554 /* Invert column permutation to find out where my columns should go */ 1555 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1556 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1557 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1558 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1559 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1560 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1561 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1562 1563 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1564 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1565 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1566 1567 /* Find out where my gcols should go */ 1568 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1569 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1570 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1571 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1572 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1573 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1574 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1575 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1576 1577 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1578 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1579 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1580 for (i=0; i<m; i++) { 1581 PetscInt row = rdest[i],rowner; 1582 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1583 for (j=ai[i]; j<ai[i+1]; j++) { 1584 PetscInt cowner,col = cdest[aj[j]]; 1585 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1586 if (rowner == cowner) dnnz[i]++; 1587 else onnz[i]++; 1588 } 1589 for (j=bi[i]; j<bi[i+1]; j++) { 1590 PetscInt cowner,col = gcdest[bj[j]]; 1591 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1592 if (rowner == cowner) dnnz[i]++; 1593 else onnz[i]++; 1594 } 1595 } 1596 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1597 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1598 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1599 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1600 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1601 1602 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1603 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1604 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1605 for (i=0; i<m; i++) { 1606 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1607 PetscInt j0,rowlen; 1608 rowlen = ai[i+1] - ai[i]; 1609 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1610 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1611 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1612 } 1613 rowlen = bi[i+1] - bi[i]; 1614 for (j0=j=0; j<rowlen; j0=j) { 1615 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1616 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1617 } 1618 } 1619 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1620 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1621 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1622 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1623 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1624 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1625 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1626 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1627 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1628 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1629 *B = Aperm; 1630 PetscFunctionReturn(0); 1631 } 1632 1633 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1634 { 1635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1640 if (ghosts) *ghosts = aij->garray; 1641 PetscFunctionReturn(0); 1642 } 1643 1644 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1645 { 1646 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1647 Mat A = mat->A,B = mat->B; 1648 PetscErrorCode ierr; 1649 PetscReal isend[5],irecv[5]; 1650 1651 PetscFunctionBegin; 1652 info->block_size = 1.0; 1653 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1654 1655 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1656 isend[3] = info->memory; isend[4] = info->mallocs; 1657 1658 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1659 1660 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1661 isend[3] += info->memory; isend[4] += info->mallocs; 1662 if (flag == MAT_LOCAL) { 1663 info->nz_used = isend[0]; 1664 info->nz_allocated = isend[1]; 1665 info->nz_unneeded = isend[2]; 1666 info->memory = isend[3]; 1667 info->mallocs = isend[4]; 1668 } else if (flag == MAT_GLOBAL_MAX) { 1669 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1670 1671 info->nz_used = irecv[0]; 1672 info->nz_allocated = irecv[1]; 1673 info->nz_unneeded = irecv[2]; 1674 info->memory = irecv[3]; 1675 info->mallocs = irecv[4]; 1676 } else if (flag == MAT_GLOBAL_SUM) { 1677 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1678 1679 info->nz_used = irecv[0]; 1680 info->nz_allocated = irecv[1]; 1681 info->nz_unneeded = irecv[2]; 1682 info->memory = irecv[3]; 1683 info->mallocs = irecv[4]; 1684 } 1685 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1686 info->fill_ratio_needed = 0; 1687 info->factor_mallocs = 0; 1688 PetscFunctionReturn(0); 1689 } 1690 1691 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1692 { 1693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 switch (op) { 1698 case MAT_NEW_NONZERO_LOCATIONS: 1699 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1700 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1701 case MAT_KEEP_NONZERO_PATTERN: 1702 case MAT_NEW_NONZERO_LOCATION_ERR: 1703 case MAT_USE_INODES: 1704 case MAT_IGNORE_ZERO_ENTRIES: 1705 MatCheckPreallocated(A,1); 1706 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1707 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1708 break; 1709 case MAT_ROW_ORIENTED: 1710 MatCheckPreallocated(A,1); 1711 a->roworiented = flg; 1712 1713 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1714 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1715 break; 1716 case MAT_NEW_DIAGONALS: 1717 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1718 break; 1719 case MAT_IGNORE_OFF_PROC_ENTRIES: 1720 a->donotstash = flg; 1721 break; 1722 case MAT_SPD: 1723 A->spd_set = PETSC_TRUE; 1724 A->spd = flg; 1725 if (flg) { 1726 A->symmetric = PETSC_TRUE; 1727 A->structurally_symmetric = PETSC_TRUE; 1728 A->symmetric_set = PETSC_TRUE; 1729 A->structurally_symmetric_set = PETSC_TRUE; 1730 } 1731 break; 1732 case MAT_SYMMETRIC: 1733 MatCheckPreallocated(A,1); 1734 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1735 break; 1736 case MAT_STRUCTURALLY_SYMMETRIC: 1737 MatCheckPreallocated(A,1); 1738 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1739 break; 1740 case MAT_HERMITIAN: 1741 MatCheckPreallocated(A,1); 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 break; 1744 case MAT_SYMMETRY_ETERNAL: 1745 MatCheckPreallocated(A,1); 1746 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1747 break; 1748 case MAT_SUBMAT_SINGLEIS: 1749 A->submat_singleis = flg; 1750 break; 1751 case MAT_STRUCTURE_ONLY: 1752 /* The option is handled directly by MatSetOption() */ 1753 break; 1754 default: 1755 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1761 { 1762 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1763 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1764 PetscErrorCode ierr; 1765 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1766 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1767 PetscInt *cmap,*idx_p; 1768 1769 PetscFunctionBegin; 1770 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1771 mat->getrowactive = PETSC_TRUE; 1772 1773 if (!mat->rowvalues && (idx || v)) { 1774 /* 1775 allocate enough space to hold information from the longest row. 1776 */ 1777 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1778 PetscInt max = 1,tmp; 1779 for (i=0; i<matin->rmap->n; i++) { 1780 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1781 if (max < tmp) max = tmp; 1782 } 1783 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1784 } 1785 1786 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1787 lrow = row - rstart; 1788 1789 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1790 if (!v) {pvA = 0; pvB = 0;} 1791 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1792 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1793 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1794 nztot = nzA + nzB; 1795 1796 cmap = mat->garray; 1797 if (v || idx) { 1798 if (nztot) { 1799 /* Sort by increasing column numbers, assuming A and B already sorted */ 1800 PetscInt imark = -1; 1801 if (v) { 1802 *v = v_p = mat->rowvalues; 1803 for (i=0; i<nzB; i++) { 1804 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1805 else break; 1806 } 1807 imark = i; 1808 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1809 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1810 } 1811 if (idx) { 1812 *idx = idx_p = mat->rowindices; 1813 if (imark > -1) { 1814 for (i=0; i<imark; i++) { 1815 idx_p[i] = cmap[cworkB[i]]; 1816 } 1817 } else { 1818 for (i=0; i<nzB; i++) { 1819 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1820 else break; 1821 } 1822 imark = i; 1823 } 1824 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1825 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1826 } 1827 } else { 1828 if (idx) *idx = 0; 1829 if (v) *v = 0; 1830 } 1831 } 1832 *nz = nztot; 1833 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1834 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1839 { 1840 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1841 1842 PetscFunctionBegin; 1843 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1844 aij->getrowactive = PETSC_FALSE; 1845 PetscFunctionReturn(0); 1846 } 1847 1848 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1849 { 1850 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1851 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1852 PetscErrorCode ierr; 1853 PetscInt i,j,cstart = mat->cmap->rstart; 1854 PetscReal sum = 0.0; 1855 MatScalar *v; 1856 1857 PetscFunctionBegin; 1858 if (aij->size == 1) { 1859 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1860 } else { 1861 if (type == NORM_FROBENIUS) { 1862 v = amat->a; 1863 for (i=0; i<amat->nz; i++) { 1864 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1865 } 1866 v = bmat->a; 1867 for (i=0; i<bmat->nz; i++) { 1868 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1869 } 1870 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1871 *norm = PetscSqrtReal(*norm); 1872 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1873 } else if (type == NORM_1) { /* max column norm */ 1874 PetscReal *tmp,*tmp2; 1875 PetscInt *jj,*garray = aij->garray; 1876 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1877 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1878 *norm = 0.0; 1879 v = amat->a; jj = amat->j; 1880 for (j=0; j<amat->nz; j++) { 1881 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1882 } 1883 v = bmat->a; jj = bmat->j; 1884 for (j=0; j<bmat->nz; j++) { 1885 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1886 } 1887 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1888 for (j=0; j<mat->cmap->N; j++) { 1889 if (tmp2[j] > *norm) *norm = tmp2[j]; 1890 } 1891 ierr = PetscFree(tmp);CHKERRQ(ierr); 1892 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1893 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1894 } else if (type == NORM_INFINITY) { /* max row norm */ 1895 PetscReal ntemp = 0.0; 1896 for (j=0; j<aij->A->rmap->n; j++) { 1897 v = amat->a + amat->i[j]; 1898 sum = 0.0; 1899 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1900 sum += PetscAbsScalar(*v); v++; 1901 } 1902 v = bmat->a + bmat->i[j]; 1903 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1904 sum += PetscAbsScalar(*v); v++; 1905 } 1906 if (sum > ntemp) ntemp = sum; 1907 } 1908 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1909 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1910 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1916 { 1917 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1918 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1919 PetscErrorCode ierr; 1920 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1921 PetscInt cstart = A->cmap->rstart,ncol; 1922 Mat B; 1923 MatScalar *array; 1924 1925 PetscFunctionBegin; 1926 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1927 1928 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1929 ai = Aloc->i; aj = Aloc->j; 1930 bi = Bloc->i; bj = Bloc->j; 1931 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1932 PetscInt *d_nnz,*g_nnz,*o_nnz; 1933 PetscSFNode *oloc; 1934 PETSC_UNUSED PetscSF sf; 1935 1936 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1937 /* compute d_nnz for preallocation */ 1938 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1939 for (i=0; i<ai[ma]; i++) { 1940 d_nnz[aj[i]]++; 1941 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1942 } 1943 /* compute local off-diagonal contributions */ 1944 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1945 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1946 /* map those to global */ 1947 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1948 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1949 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1950 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1951 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1952 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1953 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1954 1955 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1956 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1957 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1958 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1959 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1960 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1961 } else { 1962 B = *matout; 1963 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1964 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1965 } 1966 1967 /* copy over the A part */ 1968 array = Aloc->a; 1969 row = A->rmap->rstart; 1970 for (i=0; i<ma; i++) { 1971 ncol = ai[i+1]-ai[i]; 1972 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1973 row++; 1974 array += ncol; aj += ncol; 1975 } 1976 aj = Aloc->j; 1977 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1978 1979 /* copy over the B part */ 1980 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1981 array = Bloc->a; 1982 row = A->rmap->rstart; 1983 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1984 cols_tmp = cols; 1985 for (i=0; i<mb; i++) { 1986 ncol = bi[i+1]-bi[i]; 1987 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1988 row++; 1989 array += ncol; cols_tmp += ncol; 1990 } 1991 ierr = PetscFree(cols);CHKERRQ(ierr); 1992 1993 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1994 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1995 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1996 *matout = B; 1997 } else { 1998 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2004 { 2005 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2006 Mat a = aij->A,b = aij->B; 2007 PetscErrorCode ierr; 2008 PetscInt s1,s2,s3; 2009 2010 PetscFunctionBegin; 2011 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2012 if (rr) { 2013 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2014 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2015 /* Overlap communication with computation. */ 2016 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2017 } 2018 if (ll) { 2019 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2020 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2021 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2022 } 2023 /* scale the diagonal block */ 2024 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2025 2026 if (rr) { 2027 /* Do a scatter end and then right scale the off-diagonal block */ 2028 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2029 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2030 } 2031 PetscFunctionReturn(0); 2032 } 2033 2034 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2037 PetscErrorCode ierr; 2038 2039 PetscFunctionBegin; 2040 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2045 { 2046 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2047 Mat a,b,c,d; 2048 PetscBool flg; 2049 PetscErrorCode ierr; 2050 2051 PetscFunctionBegin; 2052 a = matA->A; b = matA->B; 2053 c = matB->A; d = matB->B; 2054 2055 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2056 if (flg) { 2057 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2058 } 2059 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2064 { 2065 PetscErrorCode ierr; 2066 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2067 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2068 2069 PetscFunctionBegin; 2070 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2071 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2072 /* because of the column compression in the off-processor part of the matrix a->B, 2073 the number of columns in a->B and b->B may be different, hence we cannot call 2074 the MatCopy() directly on the two parts. If need be, we can provide a more 2075 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2076 then copying the submatrices */ 2077 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2078 } else { 2079 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2080 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2081 } 2082 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2083 PetscFunctionReturn(0); 2084 } 2085 2086 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2087 { 2088 PetscErrorCode ierr; 2089 2090 PetscFunctionBegin; 2091 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2092 PetscFunctionReturn(0); 2093 } 2094 2095 /* 2096 Computes the number of nonzeros per row needed for preallocation when X and Y 2097 have different nonzero structure. 2098 */ 2099 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2100 { 2101 PetscInt i,j,k,nzx,nzy; 2102 2103 PetscFunctionBegin; 2104 /* Set the number of nonzeros in the new matrix */ 2105 for (i=0; i<m; i++) { 2106 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2107 nzx = xi[i+1] - xi[i]; 2108 nzy = yi[i+1] - yi[i]; 2109 nnz[i] = 0; 2110 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2111 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2112 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2113 nnz[i]++; 2114 } 2115 for (; k<nzy; k++) nnz[i]++; 2116 } 2117 PetscFunctionReturn(0); 2118 } 2119 2120 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2121 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2122 { 2123 PetscErrorCode ierr; 2124 PetscInt m = Y->rmap->N; 2125 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2126 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2127 2128 PetscFunctionBegin; 2129 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2130 PetscFunctionReturn(0); 2131 } 2132 2133 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2134 { 2135 PetscErrorCode ierr; 2136 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2137 PetscBLASInt bnz,one=1; 2138 Mat_SeqAIJ *x,*y; 2139 2140 PetscFunctionBegin; 2141 if (str == SAME_NONZERO_PATTERN) { 2142 PetscScalar alpha = a; 2143 x = (Mat_SeqAIJ*)xx->A->data; 2144 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2145 y = (Mat_SeqAIJ*)yy->A->data; 2146 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2147 x = (Mat_SeqAIJ*)xx->B->data; 2148 y = (Mat_SeqAIJ*)yy->B->data; 2149 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2150 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2151 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2152 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2153 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2154 } else { 2155 Mat B; 2156 PetscInt *nnz_d,*nnz_o; 2157 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2158 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2159 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2160 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2161 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2162 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2163 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2164 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2165 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2166 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2167 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2168 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2169 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2170 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2171 } 2172 PetscFunctionReturn(0); 2173 } 2174 2175 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2176 2177 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2178 { 2179 #if defined(PETSC_USE_COMPLEX) 2180 PetscErrorCode ierr; 2181 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2182 2183 PetscFunctionBegin; 2184 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2185 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2186 #else 2187 PetscFunctionBegin; 2188 #endif 2189 PetscFunctionReturn(0); 2190 } 2191 2192 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2193 { 2194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2195 PetscErrorCode ierr; 2196 2197 PetscFunctionBegin; 2198 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2199 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2200 PetscFunctionReturn(0); 2201 } 2202 2203 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2204 { 2205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2206 PetscErrorCode ierr; 2207 2208 PetscFunctionBegin; 2209 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2210 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2215 { 2216 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2217 PetscErrorCode ierr; 2218 PetscInt i,*idxb = 0; 2219 PetscScalar *va,*vb; 2220 Vec vtmp; 2221 2222 PetscFunctionBegin; 2223 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2224 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2225 if (idx) { 2226 for (i=0; i<A->rmap->n; i++) { 2227 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2228 } 2229 } 2230 2231 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2232 if (idx) { 2233 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2234 } 2235 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2236 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2237 2238 for (i=0; i<A->rmap->n; i++) { 2239 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2240 va[i] = vb[i]; 2241 if (idx) idx[i] = a->garray[idxb[i]]; 2242 } 2243 } 2244 2245 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2246 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2247 ierr = PetscFree(idxb);CHKERRQ(ierr); 2248 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2253 { 2254 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2255 PetscErrorCode ierr; 2256 PetscInt i,*idxb = 0; 2257 PetscScalar *va,*vb; 2258 Vec vtmp; 2259 2260 PetscFunctionBegin; 2261 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2262 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2263 if (idx) { 2264 for (i=0; i<A->cmap->n; i++) { 2265 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2266 } 2267 } 2268 2269 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2270 if (idx) { 2271 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2272 } 2273 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2274 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2275 2276 for (i=0; i<A->rmap->n; i++) { 2277 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2278 va[i] = vb[i]; 2279 if (idx) idx[i] = a->garray[idxb[i]]; 2280 } 2281 } 2282 2283 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2284 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2285 ierr = PetscFree(idxb);CHKERRQ(ierr); 2286 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2287 PetscFunctionReturn(0); 2288 } 2289 2290 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2291 { 2292 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2293 PetscInt n = A->rmap->n; 2294 PetscInt cstart = A->cmap->rstart; 2295 PetscInt *cmap = mat->garray; 2296 PetscInt *diagIdx, *offdiagIdx; 2297 Vec diagV, offdiagV; 2298 PetscScalar *a, *diagA, *offdiagA; 2299 PetscInt r; 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2304 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2305 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2306 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2307 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2308 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2309 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2310 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2311 for (r = 0; r < n; ++r) { 2312 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 idx[r] = cstart + diagIdx[r]; 2315 } else { 2316 a[r] = offdiagA[r]; 2317 idx[r] = cmap[offdiagIdx[r]]; 2318 } 2319 } 2320 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2321 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2322 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2323 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2324 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2325 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2326 PetscFunctionReturn(0); 2327 } 2328 2329 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2330 { 2331 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2332 PetscInt n = A->rmap->n; 2333 PetscInt cstart = A->cmap->rstart; 2334 PetscInt *cmap = mat->garray; 2335 PetscInt *diagIdx, *offdiagIdx; 2336 Vec diagV, offdiagV; 2337 PetscScalar *a, *diagA, *offdiagA; 2338 PetscInt r; 2339 PetscErrorCode ierr; 2340 2341 PetscFunctionBegin; 2342 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2343 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2344 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2345 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2346 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2347 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2348 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2349 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2350 for (r = 0; r < n; ++r) { 2351 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2352 a[r] = diagA[r]; 2353 idx[r] = cstart + diagIdx[r]; 2354 } else { 2355 a[r] = offdiagA[r]; 2356 idx[r] = cmap[offdiagIdx[r]]; 2357 } 2358 } 2359 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2360 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2361 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2362 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2363 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2364 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2369 { 2370 PetscErrorCode ierr; 2371 Mat *dummy; 2372 2373 PetscFunctionBegin; 2374 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2375 *newmat = *dummy; 2376 ierr = PetscFree(dummy);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2381 { 2382 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2383 PetscErrorCode ierr; 2384 2385 PetscFunctionBegin; 2386 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2387 A->factorerrortype = a->A->factorerrortype; 2388 PetscFunctionReturn(0); 2389 } 2390 2391 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2392 { 2393 PetscErrorCode ierr; 2394 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2395 2396 PetscFunctionBegin; 2397 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2398 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2399 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2400 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2401 PetscFunctionReturn(0); 2402 } 2403 2404 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2405 { 2406 PetscFunctionBegin; 2407 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2408 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2409 PetscFunctionReturn(0); 2410 } 2411 2412 /*@ 2413 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2414 2415 Collective on Mat 2416 2417 Input Parameters: 2418 + A - the matrix 2419 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2420 2421 Level: advanced 2422 2423 @*/ 2424 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2425 { 2426 PetscErrorCode ierr; 2427 2428 PetscFunctionBegin; 2429 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2434 { 2435 PetscErrorCode ierr; 2436 PetscBool sc = PETSC_FALSE,flg; 2437 2438 PetscFunctionBegin; 2439 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2440 ierr = PetscObjectOptionsBegin((PetscObject)A); 2441 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2442 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2443 if (flg) { 2444 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2445 } 2446 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2451 { 2452 PetscErrorCode ierr; 2453 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2454 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2455 2456 PetscFunctionBegin; 2457 if (!Y->preallocated) { 2458 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2459 } else if (!aij->nz) { 2460 PetscInt nonew = aij->nonew; 2461 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2462 aij->nonew = nonew; 2463 } 2464 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2465 PetscFunctionReturn(0); 2466 } 2467 2468 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2469 { 2470 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2471 PetscErrorCode ierr; 2472 2473 PetscFunctionBegin; 2474 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2475 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2476 if (d) { 2477 PetscInt rstart; 2478 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2479 *d += rstart; 2480 2481 } 2482 PetscFunctionReturn(0); 2483 } 2484 2485 2486 /* -------------------------------------------------------------------*/ 2487 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2488 MatGetRow_MPIAIJ, 2489 MatRestoreRow_MPIAIJ, 2490 MatMult_MPIAIJ, 2491 /* 4*/ MatMultAdd_MPIAIJ, 2492 MatMultTranspose_MPIAIJ, 2493 MatMultTransposeAdd_MPIAIJ, 2494 0, 2495 0, 2496 0, 2497 /*10*/ 0, 2498 0, 2499 0, 2500 MatSOR_MPIAIJ, 2501 MatTranspose_MPIAIJ, 2502 /*15*/ MatGetInfo_MPIAIJ, 2503 MatEqual_MPIAIJ, 2504 MatGetDiagonal_MPIAIJ, 2505 MatDiagonalScale_MPIAIJ, 2506 MatNorm_MPIAIJ, 2507 /*20*/ MatAssemblyBegin_MPIAIJ, 2508 MatAssemblyEnd_MPIAIJ, 2509 MatSetOption_MPIAIJ, 2510 MatZeroEntries_MPIAIJ, 2511 /*24*/ MatZeroRows_MPIAIJ, 2512 0, 2513 0, 2514 0, 2515 0, 2516 /*29*/ MatSetUp_MPIAIJ, 2517 0, 2518 0, 2519 MatGetDiagonalBlock_MPIAIJ, 2520 0, 2521 /*34*/ MatDuplicate_MPIAIJ, 2522 0, 2523 0, 2524 0, 2525 0, 2526 /*39*/ MatAXPY_MPIAIJ, 2527 MatCreateSubMatrices_MPIAIJ, 2528 MatIncreaseOverlap_MPIAIJ, 2529 MatGetValues_MPIAIJ, 2530 MatCopy_MPIAIJ, 2531 /*44*/ MatGetRowMax_MPIAIJ, 2532 MatScale_MPIAIJ, 2533 MatShift_MPIAIJ, 2534 MatDiagonalSet_MPIAIJ, 2535 MatZeroRowsColumns_MPIAIJ, 2536 /*49*/ MatSetRandom_MPIAIJ, 2537 0, 2538 0, 2539 0, 2540 0, 2541 /*54*/ MatFDColoringCreate_MPIXAIJ, 2542 0, 2543 MatSetUnfactored_MPIAIJ, 2544 MatPermute_MPIAIJ, 2545 0, 2546 /*59*/ MatCreateSubMatrix_MPIAIJ, 2547 MatDestroy_MPIAIJ, 2548 MatView_MPIAIJ, 2549 0, 2550 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2551 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2552 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2553 0, 2554 0, 2555 0, 2556 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2557 MatGetRowMinAbs_MPIAIJ, 2558 0, 2559 0, 2560 0, 2561 0, 2562 /*75*/ MatFDColoringApply_AIJ, 2563 MatSetFromOptions_MPIAIJ, 2564 0, 2565 0, 2566 MatFindZeroDiagonals_MPIAIJ, 2567 /*80*/ 0, 2568 0, 2569 0, 2570 /*83*/ MatLoad_MPIAIJ, 2571 MatIsSymmetric_MPIAIJ, 2572 0, 2573 0, 2574 0, 2575 0, 2576 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2577 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2578 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2579 MatPtAP_MPIAIJ_MPIAIJ, 2580 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2581 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2582 0, 2583 0, 2584 0, 2585 0, 2586 /*99*/ 0, 2587 0, 2588 0, 2589 MatConjugate_MPIAIJ, 2590 0, 2591 /*104*/MatSetValuesRow_MPIAIJ, 2592 MatRealPart_MPIAIJ, 2593 MatImaginaryPart_MPIAIJ, 2594 0, 2595 0, 2596 /*109*/0, 2597 0, 2598 MatGetRowMin_MPIAIJ, 2599 0, 2600 MatMissingDiagonal_MPIAIJ, 2601 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2602 0, 2603 MatGetGhosts_MPIAIJ, 2604 0, 2605 0, 2606 /*119*/0, 2607 0, 2608 0, 2609 0, 2610 MatGetMultiProcBlock_MPIAIJ, 2611 /*124*/MatFindNonzeroRows_MPIAIJ, 2612 MatGetColumnNorms_MPIAIJ, 2613 MatInvertBlockDiagonal_MPIAIJ, 2614 0, 2615 MatCreateSubMatricesMPI_MPIAIJ, 2616 /*129*/0, 2617 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2618 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2619 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2620 0, 2621 /*134*/0, 2622 0, 2623 MatRARt_MPIAIJ_MPIAIJ, 2624 0, 2625 0, 2626 /*139*/MatSetBlockSizes_MPIAIJ, 2627 0, 2628 0, 2629 MatFDColoringSetUp_MPIXAIJ, 2630 MatFindOffBlockDiagonalEntries_MPIAIJ, 2631 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2632 }; 2633 2634 /* ----------------------------------------------------------------------------------------*/ 2635 2636 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2637 { 2638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2639 PetscErrorCode ierr; 2640 2641 PetscFunctionBegin; 2642 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2643 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2644 PetscFunctionReturn(0); 2645 } 2646 2647 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2648 { 2649 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2650 PetscErrorCode ierr; 2651 2652 PetscFunctionBegin; 2653 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2654 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2655 PetscFunctionReturn(0); 2656 } 2657 2658 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2659 { 2660 Mat_MPIAIJ *b; 2661 PetscErrorCode ierr; 2662 2663 PetscFunctionBegin; 2664 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2665 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2666 b = (Mat_MPIAIJ*)B->data; 2667 2668 #if defined(PETSC_USE_CTABLE) 2669 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2670 #else 2671 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2672 #endif 2673 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2674 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2675 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2676 2677 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2678 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2679 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2680 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2681 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2682 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2683 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2684 2685 if (!B->preallocated) { 2686 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2687 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2688 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2689 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2690 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2691 } 2692 2693 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2694 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2695 B->preallocated = PETSC_TRUE; 2696 B->was_assembled = PETSC_FALSE; 2697 B->assembled = PETSC_FALSE;; 2698 PetscFunctionReturn(0); 2699 } 2700 2701 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2702 { 2703 Mat_MPIAIJ *b; 2704 PetscErrorCode ierr; 2705 2706 PetscFunctionBegin; 2707 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2708 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2709 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2710 b = (Mat_MPIAIJ*)B->data; 2711 2712 #if defined(PETSC_USE_CTABLE) 2713 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2714 #else 2715 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2716 #endif 2717 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2718 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2719 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2720 2721 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2722 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2723 B->preallocated = PETSC_TRUE; 2724 B->was_assembled = PETSC_FALSE; 2725 B->assembled = PETSC_FALSE; 2726 PetscFunctionReturn(0); 2727 } 2728 2729 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2730 { 2731 Mat mat; 2732 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2733 PetscErrorCode ierr; 2734 2735 PetscFunctionBegin; 2736 *newmat = 0; 2737 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2738 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2739 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2740 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2741 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2742 a = (Mat_MPIAIJ*)mat->data; 2743 2744 mat->factortype = matin->factortype; 2745 mat->assembled = PETSC_TRUE; 2746 mat->insertmode = NOT_SET_VALUES; 2747 mat->preallocated = PETSC_TRUE; 2748 2749 a->size = oldmat->size; 2750 a->rank = oldmat->rank; 2751 a->donotstash = oldmat->donotstash; 2752 a->roworiented = oldmat->roworiented; 2753 a->rowindices = 0; 2754 a->rowvalues = 0; 2755 a->getrowactive = PETSC_FALSE; 2756 2757 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2758 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2759 2760 if (oldmat->colmap) { 2761 #if defined(PETSC_USE_CTABLE) 2762 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2763 #else 2764 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2765 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2766 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2767 #endif 2768 } else a->colmap = 0; 2769 if (oldmat->garray) { 2770 PetscInt len; 2771 len = oldmat->B->cmap->n; 2772 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2773 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2774 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2775 } else a->garray = 0; 2776 2777 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2778 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2779 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2780 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2781 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2782 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2783 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2785 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2786 *newmat = mat; 2787 PetscFunctionReturn(0); 2788 } 2789 2790 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2791 { 2792 PetscScalar *vals,*svals; 2793 MPI_Comm comm; 2794 PetscErrorCode ierr; 2795 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2796 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2797 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2798 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2799 PetscInt cend,cstart,n,*rowners; 2800 int fd; 2801 PetscInt bs = newMat->rmap->bs; 2802 2803 PetscFunctionBegin; 2804 /* force binary viewer to load .info file if it has not yet done so */ 2805 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2806 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2807 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2808 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2809 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2810 if (!rank) { 2811 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2812 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2813 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2814 } 2815 2816 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2817 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2818 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2819 if (bs < 0) bs = 1; 2820 2821 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2822 M = header[1]; N = header[2]; 2823 2824 /* If global sizes are set, check if they are consistent with that given in the file */ 2825 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2826 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2827 2828 /* determine ownership of all (block) rows */ 2829 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2830 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2831 else m = newMat->rmap->n; /* Set by user */ 2832 2833 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2834 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2835 2836 /* First process needs enough room for process with most rows */ 2837 if (!rank) { 2838 mmax = rowners[1]; 2839 for (i=2; i<=size; i++) { 2840 mmax = PetscMax(mmax, rowners[i]); 2841 } 2842 } else mmax = -1; /* unused, but compilers complain */ 2843 2844 rowners[0] = 0; 2845 for (i=2; i<=size; i++) { 2846 rowners[i] += rowners[i-1]; 2847 } 2848 rstart = rowners[rank]; 2849 rend = rowners[rank+1]; 2850 2851 /* distribute row lengths to all processors */ 2852 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2853 if (!rank) { 2854 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2855 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2856 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2857 for (j=0; j<m; j++) { 2858 procsnz[0] += ourlens[j]; 2859 } 2860 for (i=1; i<size; i++) { 2861 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2862 /* calculate the number of nonzeros on each processor */ 2863 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2864 procsnz[i] += rowlengths[j]; 2865 } 2866 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2867 } 2868 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2869 } else { 2870 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2871 } 2872 2873 if (!rank) { 2874 /* determine max buffer needed and allocate it */ 2875 maxnz = 0; 2876 for (i=0; i<size; i++) { 2877 maxnz = PetscMax(maxnz,procsnz[i]); 2878 } 2879 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2880 2881 /* read in my part of the matrix column indices */ 2882 nz = procsnz[0]; 2883 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2884 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2885 2886 /* read in every one elses and ship off */ 2887 for (i=1; i<size; i++) { 2888 nz = procsnz[i]; 2889 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2890 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2891 } 2892 ierr = PetscFree(cols);CHKERRQ(ierr); 2893 } else { 2894 /* determine buffer space needed for message */ 2895 nz = 0; 2896 for (i=0; i<m; i++) { 2897 nz += ourlens[i]; 2898 } 2899 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2900 2901 /* receive message of column indices*/ 2902 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2903 } 2904 2905 /* determine column ownership if matrix is not square */ 2906 if (N != M) { 2907 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2908 else n = newMat->cmap->n; 2909 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2910 cstart = cend - n; 2911 } else { 2912 cstart = rstart; 2913 cend = rend; 2914 n = cend - cstart; 2915 } 2916 2917 /* loop over local rows, determining number of off diagonal entries */ 2918 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2919 jj = 0; 2920 for (i=0; i<m; i++) { 2921 for (j=0; j<ourlens[i]; j++) { 2922 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2923 jj++; 2924 } 2925 } 2926 2927 for (i=0; i<m; i++) { 2928 ourlens[i] -= offlens[i]; 2929 } 2930 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2931 2932 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2933 2934 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2935 2936 for (i=0; i<m; i++) { 2937 ourlens[i] += offlens[i]; 2938 } 2939 2940 if (!rank) { 2941 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2942 2943 /* read in my part of the matrix numerical values */ 2944 nz = procsnz[0]; 2945 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2946 2947 /* insert into matrix */ 2948 jj = rstart; 2949 smycols = mycols; 2950 svals = vals; 2951 for (i=0; i<m; i++) { 2952 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2953 smycols += ourlens[i]; 2954 svals += ourlens[i]; 2955 jj++; 2956 } 2957 2958 /* read in other processors and ship out */ 2959 for (i=1; i<size; i++) { 2960 nz = procsnz[i]; 2961 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2962 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2963 } 2964 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2965 } else { 2966 /* receive numeric values */ 2967 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2968 2969 /* receive message of values*/ 2970 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2971 2972 /* insert into matrix */ 2973 jj = rstart; 2974 smycols = mycols; 2975 svals = vals; 2976 for (i=0; i<m; i++) { 2977 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2978 smycols += ourlens[i]; 2979 svals += ourlens[i]; 2980 jj++; 2981 } 2982 } 2983 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2984 ierr = PetscFree(vals);CHKERRQ(ierr); 2985 ierr = PetscFree(mycols);CHKERRQ(ierr); 2986 ierr = PetscFree(rowners);CHKERRQ(ierr); 2987 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2988 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2989 PetscFunctionReturn(0); 2990 } 2991 2992 /* Not scalable because of ISAllGather() unless getting all columns. */ 2993 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2994 { 2995 PetscErrorCode ierr; 2996 IS iscol_local; 2997 PetscBool isstride; 2998 PetscMPIInt lisstride=0,gisstride; 2999 3000 PetscFunctionBegin; 3001 /* check if we are grabbing all columns*/ 3002 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3003 3004 if (isstride) { 3005 PetscInt start,len,mstart,mlen; 3006 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3007 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3008 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3009 if (mstart == start && mlen-mstart == len) lisstride = 1; 3010 } 3011 3012 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3013 if (gisstride) { 3014 PetscInt N; 3015 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3016 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3017 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3018 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3019 } else { 3020 PetscInt cbs; 3021 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3022 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3023 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3024 } 3025 3026 *isseq = iscol_local; 3027 PetscFunctionReturn(0); 3028 } 3029 3030 /* 3031 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3032 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3033 3034 Input Parameters: 3035 mat - matrix 3036 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3037 i.e., mat->rstart <= isrow[i] < mat->rend 3038 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3039 i.e., mat->cstart <= iscol[i] < mat->cend 3040 Output Parameter: 3041 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3042 iscol_o - sequential column index set for retrieving mat->B 3043 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3044 */ 3045 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3046 { 3047 PetscErrorCode ierr; 3048 Vec x,cmap; 3049 const PetscInt *is_idx; 3050 PetscScalar *xarray,*cmaparray; 3051 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3052 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3053 Mat B=a->B; 3054 Vec lvec=a->lvec,lcmap; 3055 PetscInt i,cstart,cend,Bn=B->cmap->N; 3056 MPI_Comm comm; 3057 VecScatter Mvctx=a->Mvctx; 3058 3059 PetscFunctionBegin; 3060 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3061 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3062 3063 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3064 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3065 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3066 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3067 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3068 3069 /* Get start indices */ 3070 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3071 isstart -= ncols; 3072 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3073 3074 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3075 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3076 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3077 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3078 for (i=0; i<ncols; i++) { 3079 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3080 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3081 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3082 } 3083 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3084 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3085 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3086 3087 /* Get iscol_d */ 3088 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3089 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3090 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3091 3092 /* Get isrow_d */ 3093 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3094 rstart = mat->rmap->rstart; 3095 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3096 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3097 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3098 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3099 3100 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3101 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3102 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3103 3104 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3105 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3106 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3107 3108 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3109 3110 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3111 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3112 3113 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3114 /* off-process column indices */ 3115 count = 0; 3116 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3117 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3118 3119 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3120 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3121 for (i=0; i<Bn; i++) { 3122 if (PetscRealPart(xarray[i]) > -1.0) { 3123 idx[count] = i; /* local column index in off-diagonal part B */ 3124 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3125 count++; 3126 } 3127 } 3128 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3129 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3130 3131 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3132 /* cannot ensure iscol_o has same blocksize as iscol! */ 3133 3134 ierr = PetscFree(idx);CHKERRQ(ierr); 3135 *garray = cmap1; 3136 3137 ierr = VecDestroy(&x);CHKERRQ(ierr); 3138 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3139 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3140 PetscFunctionReturn(0); 3141 } 3142 3143 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3144 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3145 { 3146 PetscErrorCode ierr; 3147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3148 Mat M = NULL; 3149 MPI_Comm comm; 3150 IS iscol_d,isrow_d,iscol_o; 3151 Mat Asub = NULL,Bsub = NULL; 3152 PetscInt n; 3153 3154 PetscFunctionBegin; 3155 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3156 3157 if (call == MAT_REUSE_MATRIX) { 3158 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3159 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3160 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3161 3162 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3163 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3164 3165 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3166 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3167 3168 /* Update diagonal and off-diagonal portions of submat */ 3169 asub = (Mat_MPIAIJ*)(*submat)->data; 3170 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3171 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3172 if (n) { 3173 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3174 } 3175 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3176 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3177 3178 } else { /* call == MAT_INITIAL_MATRIX) */ 3179 const PetscInt *garray; 3180 PetscInt BsubN; 3181 3182 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3183 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3184 3185 /* Create local submatrices Asub and Bsub */ 3186 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3187 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3188 3189 /* Create submatrix M */ 3190 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3191 3192 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3193 asub = (Mat_MPIAIJ*)M->data; 3194 3195 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3196 n = asub->B->cmap->N; 3197 if (BsubN > n) { 3198 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3199 const PetscInt *idx; 3200 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3201 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3202 3203 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3204 j = 0; 3205 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3206 for (i=0; i<n; i++) { 3207 if (j >= BsubN) break; 3208 while (subgarray[i] > garray[j]) j++; 3209 3210 if (subgarray[i] == garray[j]) { 3211 idx_new[i] = idx[j++]; 3212 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3213 } 3214 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3215 3216 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3218 3219 } else if (BsubN < n) { 3220 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3221 } 3222 3223 ierr = PetscFree(garray);CHKERRQ(ierr); 3224 *submat = M; 3225 3226 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3227 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3228 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3229 3230 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3231 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3232 3233 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3234 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3235 } 3236 PetscFunctionReturn(0); 3237 } 3238 3239 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3240 { 3241 PetscErrorCode ierr; 3242 IS iscol_local=NULL,isrow_d; 3243 PetscInt csize; 3244 PetscInt n,i,j,start,end; 3245 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3246 MPI_Comm comm; 3247 3248 PetscFunctionBegin; 3249 /* If isrow has same processor distribution as mat, 3250 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3251 if (call == MAT_REUSE_MATRIX) { 3252 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3253 if (isrow_d) { 3254 sameRowDist = PETSC_TRUE; 3255 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3256 } else { 3257 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3258 if (iscol_local) { 3259 sameRowDist = PETSC_TRUE; 3260 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3261 } 3262 } 3263 } else { 3264 /* Check if isrow has same processor distribution as mat */ 3265 sameDist[0] = PETSC_FALSE; 3266 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3267 if (!n) { 3268 sameDist[0] = PETSC_TRUE; 3269 } else { 3270 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3271 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3272 if (i >= start && j < end) { 3273 sameDist[0] = PETSC_TRUE; 3274 } 3275 } 3276 3277 /* Check if iscol has same processor distribution as mat */ 3278 sameDist[1] = PETSC_FALSE; 3279 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3280 if (!n) { 3281 sameDist[1] = PETSC_TRUE; 3282 } else { 3283 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3284 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3285 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3286 } 3287 3288 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3289 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3290 sameRowDist = tsameDist[0]; 3291 } 3292 3293 if (sameRowDist) { 3294 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3295 /* isrow and iscol have same processor distribution as mat */ 3296 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3297 PetscFunctionReturn(0); 3298 } else { /* sameRowDist */ 3299 /* isrow has same processor distribution as mat */ 3300 if (call == MAT_INITIAL_MATRIX) { 3301 PetscBool sorted; 3302 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3303 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3304 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3305 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3306 3307 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3308 if (sorted) { 3309 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3310 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3311 PetscFunctionReturn(0); 3312 } 3313 } else { /* call == MAT_REUSE_MATRIX */ 3314 IS iscol_sub; 3315 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3316 if (iscol_sub) { 3317 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3318 PetscFunctionReturn(0); 3319 } 3320 } 3321 } 3322 } 3323 3324 /* General case: iscol -> iscol_local which has global size of iscol */ 3325 if (call == MAT_REUSE_MATRIX) { 3326 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3327 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3328 } else { 3329 if (!iscol_local) { 3330 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3331 } 3332 } 3333 3334 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3335 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3336 3337 if (call == MAT_INITIAL_MATRIX) { 3338 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3339 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3340 } 3341 PetscFunctionReturn(0); 3342 } 3343 3344 /*@C 3345 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3346 and "off-diagonal" part of the matrix in CSR format. 3347 3348 Collective on MPI_Comm 3349 3350 Input Parameters: 3351 + comm - MPI communicator 3352 . A - "diagonal" portion of matrix 3353 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3354 - garray - global index of B columns 3355 3356 Output Parameter: 3357 . mat - the matrix, with input A as its local diagonal matrix 3358 Level: advanced 3359 3360 Notes: 3361 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3362 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3363 3364 .seealso: MatCreateMPIAIJWithSplitArrays() 3365 @*/ 3366 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3367 { 3368 PetscErrorCode ierr; 3369 Mat_MPIAIJ *maij; 3370 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3371 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3372 PetscScalar *oa=b->a; 3373 Mat Bnew; 3374 PetscInt m,n,N; 3375 3376 PetscFunctionBegin; 3377 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3378 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3379 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3380 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3381 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3382 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3383 3384 /* Get global columns of mat */ 3385 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3386 3387 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3388 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3389 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3390 maij = (Mat_MPIAIJ*)(*mat)->data; 3391 3392 (*mat)->preallocated = PETSC_TRUE; 3393 3394 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3395 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3396 3397 /* Set A as diagonal portion of *mat */ 3398 maij->A = A; 3399 3400 nz = oi[m]; 3401 for (i=0; i<nz; i++) { 3402 col = oj[i]; 3403 oj[i] = garray[col]; 3404 } 3405 3406 /* Set Bnew as off-diagonal portion of *mat */ 3407 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3408 bnew = (Mat_SeqAIJ*)Bnew->data; 3409 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3410 maij->B = Bnew; 3411 3412 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3413 3414 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3415 b->free_a = PETSC_FALSE; 3416 b->free_ij = PETSC_FALSE; 3417 ierr = MatDestroy(&B);CHKERRQ(ierr); 3418 3419 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3420 bnew->free_a = PETSC_TRUE; 3421 bnew->free_ij = PETSC_TRUE; 3422 3423 /* condense columns of maij->B */ 3424 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3425 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3426 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3427 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3428 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3429 PetscFunctionReturn(0); 3430 } 3431 3432 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3433 3434 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3435 { 3436 PetscErrorCode ierr; 3437 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3438 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3439 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3440 Mat M,Msub,B=a->B; 3441 MatScalar *aa; 3442 Mat_SeqAIJ *aij; 3443 PetscInt *garray = a->garray,*colsub,Ncols; 3444 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3445 IS iscol_sub,iscmap; 3446 const PetscInt *is_idx,*cmap; 3447 PetscBool allcolumns=PETSC_FALSE; 3448 MPI_Comm comm; 3449 3450 PetscFunctionBegin; 3451 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3452 3453 if (call == MAT_REUSE_MATRIX) { 3454 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3455 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3456 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3457 3458 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3459 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3460 3461 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3462 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3463 3464 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3465 3466 } else { /* call == MAT_INITIAL_MATRIX) */ 3467 PetscBool flg; 3468 3469 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3470 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3471 3472 /* (1) iscol -> nonscalable iscol_local */ 3473 /* Check for special case: each processor gets entire matrix columns */ 3474 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3475 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3476 if (allcolumns) { 3477 iscol_sub = iscol_local; 3478 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3479 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3480 3481 } else { 3482 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3483 PetscInt *idx,*cmap1,k; 3484 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3485 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3486 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3487 count = 0; 3488 k = 0; 3489 for (i=0; i<Ncols; i++) { 3490 j = is_idx[i]; 3491 if (j >= cstart && j < cend) { 3492 /* diagonal part of mat */ 3493 idx[count] = j; 3494 cmap1[count++] = i; /* column index in submat */ 3495 } else if (Bn) { 3496 /* off-diagonal part of mat */ 3497 if (j == garray[k]) { 3498 idx[count] = j; 3499 cmap1[count++] = i; /* column index in submat */ 3500 } else if (j > garray[k]) { 3501 while (j > garray[k] && k < Bn-1) k++; 3502 if (j == garray[k]) { 3503 idx[count] = j; 3504 cmap1[count++] = i; /* column index in submat */ 3505 } 3506 } 3507 } 3508 } 3509 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3510 3511 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3512 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3513 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3514 3515 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3516 } 3517 3518 /* (3) Create sequential Msub */ 3519 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3520 } 3521 3522 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3523 aij = (Mat_SeqAIJ*)(Msub)->data; 3524 ii = aij->i; 3525 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3526 3527 /* 3528 m - number of local rows 3529 Ncols - number of columns (same on all processors) 3530 rstart - first row in new global matrix generated 3531 */ 3532 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3533 3534 if (call == MAT_INITIAL_MATRIX) { 3535 /* (4) Create parallel newmat */ 3536 PetscMPIInt rank,size; 3537 PetscInt csize; 3538 3539 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3540 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3541 3542 /* 3543 Determine the number of non-zeros in the diagonal and off-diagonal 3544 portions of the matrix in order to do correct preallocation 3545 */ 3546 3547 /* first get start and end of "diagonal" columns */ 3548 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3549 if (csize == PETSC_DECIDE) { 3550 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3551 if (mglobal == Ncols) { /* square matrix */ 3552 nlocal = m; 3553 } else { 3554 nlocal = Ncols/size + ((Ncols % size) > rank); 3555 } 3556 } else { 3557 nlocal = csize; 3558 } 3559 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3560 rstart = rend - nlocal; 3561 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3562 3563 /* next, compute all the lengths */ 3564 jj = aij->j; 3565 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3566 olens = dlens + m; 3567 for (i=0; i<m; i++) { 3568 jend = ii[i+1] - ii[i]; 3569 olen = 0; 3570 dlen = 0; 3571 for (j=0; j<jend; j++) { 3572 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3573 else dlen++; 3574 jj++; 3575 } 3576 olens[i] = olen; 3577 dlens[i] = dlen; 3578 } 3579 3580 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3581 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3582 3583 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3584 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3585 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3586 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3587 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3588 ierr = PetscFree(dlens);CHKERRQ(ierr); 3589 3590 } else { /* call == MAT_REUSE_MATRIX */ 3591 M = *newmat; 3592 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3593 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3594 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3595 /* 3596 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3597 rather than the slower MatSetValues(). 3598 */ 3599 M->was_assembled = PETSC_TRUE; 3600 M->assembled = PETSC_FALSE; 3601 } 3602 3603 /* (5) Set values of Msub to *newmat */ 3604 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3605 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3606 3607 jj = aij->j; 3608 aa = aij->a; 3609 for (i=0; i<m; i++) { 3610 row = rstart + i; 3611 nz = ii[i+1] - ii[i]; 3612 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3613 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3614 jj += nz; aa += nz; 3615 } 3616 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3617 3618 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3619 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3620 3621 ierr = PetscFree(colsub);CHKERRQ(ierr); 3622 3623 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3624 if (call == MAT_INITIAL_MATRIX) { 3625 *newmat = M; 3626 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3627 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3628 3629 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3630 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3631 3632 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3633 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3634 3635 if (iscol_local) { 3636 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3637 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3638 } 3639 } 3640 PetscFunctionReturn(0); 3641 } 3642 3643 /* 3644 Not great since it makes two copies of the submatrix, first an SeqAIJ 3645 in local and then by concatenating the local matrices the end result. 3646 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3647 3648 Note: This requires a sequential iscol with all indices. 3649 */ 3650 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3651 { 3652 PetscErrorCode ierr; 3653 PetscMPIInt rank,size; 3654 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3655 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3656 Mat M,Mreuse; 3657 MatScalar *aa,*vwork; 3658 MPI_Comm comm; 3659 Mat_SeqAIJ *aij; 3660 PetscBool colflag,allcolumns=PETSC_FALSE; 3661 3662 PetscFunctionBegin; 3663 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3664 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3665 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3666 3667 /* Check for special case: each processor gets entire matrix columns */ 3668 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3669 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3670 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3671 3672 if (call == MAT_REUSE_MATRIX) { 3673 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3674 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3675 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3676 } else { 3677 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3678 } 3679 3680 /* 3681 m - number of local rows 3682 n - number of columns (same on all processors) 3683 rstart - first row in new global matrix generated 3684 */ 3685 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3686 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3687 if (call == MAT_INITIAL_MATRIX) { 3688 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3689 ii = aij->i; 3690 jj = aij->j; 3691 3692 /* 3693 Determine the number of non-zeros in the diagonal and off-diagonal 3694 portions of the matrix in order to do correct preallocation 3695 */ 3696 3697 /* first get start and end of "diagonal" columns */ 3698 if (csize == PETSC_DECIDE) { 3699 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3700 if (mglobal == n) { /* square matrix */ 3701 nlocal = m; 3702 } else { 3703 nlocal = n/size + ((n % size) > rank); 3704 } 3705 } else { 3706 nlocal = csize; 3707 } 3708 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3709 rstart = rend - nlocal; 3710 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3711 3712 /* next, compute all the lengths */ 3713 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3714 olens = dlens + m; 3715 for (i=0; i<m; i++) { 3716 jend = ii[i+1] - ii[i]; 3717 olen = 0; 3718 dlen = 0; 3719 for (j=0; j<jend; j++) { 3720 if (*jj < rstart || *jj >= rend) olen++; 3721 else dlen++; 3722 jj++; 3723 } 3724 olens[i] = olen; 3725 dlens[i] = dlen; 3726 } 3727 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3728 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3729 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3730 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3731 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3732 ierr = PetscFree(dlens);CHKERRQ(ierr); 3733 } else { 3734 PetscInt ml,nl; 3735 3736 M = *newmat; 3737 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3738 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3739 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3740 /* 3741 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3742 rather than the slower MatSetValues(). 3743 */ 3744 M->was_assembled = PETSC_TRUE; 3745 M->assembled = PETSC_FALSE; 3746 } 3747 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3748 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3749 ii = aij->i; 3750 jj = aij->j; 3751 aa = aij->a; 3752 for (i=0; i<m; i++) { 3753 row = rstart + i; 3754 nz = ii[i+1] - ii[i]; 3755 cwork = jj; jj += nz; 3756 vwork = aa; aa += nz; 3757 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3758 } 3759 3760 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3761 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3762 *newmat = M; 3763 3764 /* save submatrix used in processor for next request */ 3765 if (call == MAT_INITIAL_MATRIX) { 3766 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3767 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3768 } 3769 PetscFunctionReturn(0); 3770 } 3771 3772 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3773 { 3774 PetscInt m,cstart, cend,j,nnz,i,d; 3775 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3776 const PetscInt *JJ; 3777 PetscScalar *values; 3778 PetscErrorCode ierr; 3779 PetscBool nooffprocentries; 3780 3781 PetscFunctionBegin; 3782 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3783 3784 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3785 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3786 m = B->rmap->n; 3787 cstart = B->cmap->rstart; 3788 cend = B->cmap->rend; 3789 rstart = B->rmap->rstart; 3790 3791 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3792 3793 #if defined(PETSC_USE_DEBUGGING) 3794 for (i=0; i<m; i++) { 3795 nnz = Ii[i+1]- Ii[i]; 3796 JJ = J + Ii[i]; 3797 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3798 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3799 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3800 } 3801 #endif 3802 3803 for (i=0; i<m; i++) { 3804 nnz = Ii[i+1]- Ii[i]; 3805 JJ = J + Ii[i]; 3806 nnz_max = PetscMax(nnz_max,nnz); 3807 d = 0; 3808 for (j=0; j<nnz; j++) { 3809 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3810 } 3811 d_nnz[i] = d; 3812 o_nnz[i] = nnz - d; 3813 } 3814 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3815 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3816 3817 if (v) values = (PetscScalar*)v; 3818 else { 3819 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3820 } 3821 3822 for (i=0; i<m; i++) { 3823 ii = i + rstart; 3824 nnz = Ii[i+1]- Ii[i]; 3825 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3826 } 3827 nooffprocentries = B->nooffprocentries; 3828 B->nooffprocentries = PETSC_TRUE; 3829 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 B->nooffprocentries = nooffprocentries; 3832 3833 if (!v) { 3834 ierr = PetscFree(values);CHKERRQ(ierr); 3835 } 3836 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3837 PetscFunctionReturn(0); 3838 } 3839 3840 /*@ 3841 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3842 (the default parallel PETSc format). 3843 3844 Collective on MPI_Comm 3845 3846 Input Parameters: 3847 + B - the matrix 3848 . i - the indices into j for the start of each local row (starts with zero) 3849 . j - the column indices for each local row (starts with zero) 3850 - v - optional values in the matrix 3851 3852 Level: developer 3853 3854 Notes: 3855 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3856 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3857 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3858 3859 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3860 3861 The format which is used for the sparse matrix input, is equivalent to a 3862 row-major ordering.. i.e for the following matrix, the input data expected is 3863 as shown 3864 3865 $ 1 0 0 3866 $ 2 0 3 P0 3867 $ ------- 3868 $ 4 5 6 P1 3869 $ 3870 $ Process0 [P0]: rows_owned=[0,1] 3871 $ i = {0,1,3} [size = nrow+1 = 2+1] 3872 $ j = {0,0,2} [size = 3] 3873 $ v = {1,2,3} [size = 3] 3874 $ 3875 $ Process1 [P1]: rows_owned=[2] 3876 $ i = {0,3} [size = nrow+1 = 1+1] 3877 $ j = {0,1,2} [size = 3] 3878 $ v = {4,5,6} [size = 3] 3879 3880 .keywords: matrix, aij, compressed row, sparse, parallel 3881 3882 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3883 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3884 @*/ 3885 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3886 { 3887 PetscErrorCode ierr; 3888 3889 PetscFunctionBegin; 3890 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3891 PetscFunctionReturn(0); 3892 } 3893 3894 /*@C 3895 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3896 (the default parallel PETSc format). For good matrix assembly performance 3897 the user should preallocate the matrix storage by setting the parameters 3898 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3899 performance can be increased by more than a factor of 50. 3900 3901 Collective on MPI_Comm 3902 3903 Input Parameters: 3904 + B - the matrix 3905 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3906 (same value is used for all local rows) 3907 . d_nnz - array containing the number of nonzeros in the various rows of the 3908 DIAGONAL portion of the local submatrix (possibly different for each row) 3909 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3910 The size of this array is equal to the number of local rows, i.e 'm'. 3911 For matrices that will be factored, you must leave room for (and set) 3912 the diagonal entry even if it is zero. 3913 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3914 submatrix (same value is used for all local rows). 3915 - o_nnz - array containing the number of nonzeros in the various rows of the 3916 OFF-DIAGONAL portion of the local submatrix (possibly different for 3917 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3918 structure. The size of this array is equal to the number 3919 of local rows, i.e 'm'. 3920 3921 If the *_nnz parameter is given then the *_nz parameter is ignored 3922 3923 The AIJ format (also called the Yale sparse matrix format or 3924 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3925 storage. The stored row and column indices begin with zero. 3926 See Users-Manual: ch_mat for details. 3927 3928 The parallel matrix is partitioned such that the first m0 rows belong to 3929 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3930 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3931 3932 The DIAGONAL portion of the local submatrix of a processor can be defined 3933 as the submatrix which is obtained by extraction the part corresponding to 3934 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3935 first row that belongs to the processor, r2 is the last row belonging to 3936 the this processor, and c1-c2 is range of indices of the local part of a 3937 vector suitable for applying the matrix to. This is an mxn matrix. In the 3938 common case of a square matrix, the row and column ranges are the same and 3939 the DIAGONAL part is also square. The remaining portion of the local 3940 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3941 3942 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3943 3944 You can call MatGetInfo() to get information on how effective the preallocation was; 3945 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3946 You can also run with the option -info and look for messages with the string 3947 malloc in them to see if additional memory allocation was needed. 3948 3949 Example usage: 3950 3951 Consider the following 8x8 matrix with 34 non-zero values, that is 3952 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3953 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3954 as follows: 3955 3956 .vb 3957 1 2 0 | 0 3 0 | 0 4 3958 Proc0 0 5 6 | 7 0 0 | 8 0 3959 9 0 10 | 11 0 0 | 12 0 3960 ------------------------------------- 3961 13 0 14 | 15 16 17 | 0 0 3962 Proc1 0 18 0 | 19 20 21 | 0 0 3963 0 0 0 | 22 23 0 | 24 0 3964 ------------------------------------- 3965 Proc2 25 26 27 | 0 0 28 | 29 0 3966 30 0 0 | 31 32 33 | 0 34 3967 .ve 3968 3969 This can be represented as a collection of submatrices as: 3970 3971 .vb 3972 A B C 3973 D E F 3974 G H I 3975 .ve 3976 3977 Where the submatrices A,B,C are owned by proc0, D,E,F are 3978 owned by proc1, G,H,I are owned by proc2. 3979 3980 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3981 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3982 The 'M','N' parameters are 8,8, and have the same values on all procs. 3983 3984 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3985 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3986 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3987 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3988 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3989 matrix, ans [DF] as another SeqAIJ matrix. 3990 3991 When d_nz, o_nz parameters are specified, d_nz storage elements are 3992 allocated for every row of the local diagonal submatrix, and o_nz 3993 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3994 One way to choose d_nz and o_nz is to use the max nonzerors per local 3995 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3996 In this case, the values of d_nz,o_nz are: 3997 .vb 3998 proc0 : dnz = 2, o_nz = 2 3999 proc1 : dnz = 3, o_nz = 2 4000 proc2 : dnz = 1, o_nz = 4 4001 .ve 4002 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4003 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4004 for proc3. i.e we are using 12+15+10=37 storage locations to store 4005 34 values. 4006 4007 When d_nnz, o_nnz parameters are specified, the storage is specified 4008 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4009 In the above case the values for d_nnz,o_nnz are: 4010 .vb 4011 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4012 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4013 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4014 .ve 4015 Here the space allocated is sum of all the above values i.e 34, and 4016 hence pre-allocation is perfect. 4017 4018 Level: intermediate 4019 4020 .keywords: matrix, aij, compressed row, sparse, parallel 4021 4022 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4023 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4024 @*/ 4025 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4026 { 4027 PetscErrorCode ierr; 4028 4029 PetscFunctionBegin; 4030 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4031 PetscValidType(B,1); 4032 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4033 PetscFunctionReturn(0); 4034 } 4035 4036 /*@ 4037 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4038 CSR format the local rows. 4039 4040 Collective on MPI_Comm 4041 4042 Input Parameters: 4043 + comm - MPI communicator 4044 . m - number of local rows (Cannot be PETSC_DECIDE) 4045 . n - This value should be the same as the local size used in creating the 4046 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4047 calculated if N is given) For square matrices n is almost always m. 4048 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4049 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4050 . i - row indices 4051 . j - column indices 4052 - a - matrix values 4053 4054 Output Parameter: 4055 . mat - the matrix 4056 4057 Level: intermediate 4058 4059 Notes: 4060 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4061 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4062 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4063 4064 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4065 4066 The format which is used for the sparse matrix input, is equivalent to a 4067 row-major ordering.. i.e for the following matrix, the input data expected is 4068 as shown 4069 4070 $ 1 0 0 4071 $ 2 0 3 P0 4072 $ ------- 4073 $ 4 5 6 P1 4074 $ 4075 $ Process0 [P0]: rows_owned=[0,1] 4076 $ i = {0,1,3} [size = nrow+1 = 2+1] 4077 $ j = {0,0,2} [size = 3] 4078 $ v = {1,2,3} [size = 3] 4079 $ 4080 $ Process1 [P1]: rows_owned=[2] 4081 $ i = {0,3} [size = nrow+1 = 1+1] 4082 $ j = {0,1,2} [size = 3] 4083 $ v = {4,5,6} [size = 3] 4084 4085 .keywords: matrix, aij, compressed row, sparse, parallel 4086 4087 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4088 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4089 @*/ 4090 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4091 { 4092 PetscErrorCode ierr; 4093 4094 PetscFunctionBegin; 4095 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4096 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4097 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4098 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4099 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4100 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4101 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4102 PetscFunctionReturn(0); 4103 } 4104 4105 /*@C 4106 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4107 (the default parallel PETSc format). For good matrix assembly performance 4108 the user should preallocate the matrix storage by setting the parameters 4109 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4110 performance can be increased by more than a factor of 50. 4111 4112 Collective on MPI_Comm 4113 4114 Input Parameters: 4115 + comm - MPI communicator 4116 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4117 This value should be the same as the local size used in creating the 4118 y vector for the matrix-vector product y = Ax. 4119 . n - This value should be the same as the local size used in creating the 4120 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4121 calculated if N is given) For square matrices n is almost always m. 4122 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4123 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4124 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4125 (same value is used for all local rows) 4126 . d_nnz - array containing the number of nonzeros in the various rows of the 4127 DIAGONAL portion of the local submatrix (possibly different for each row) 4128 or NULL, if d_nz is used to specify the nonzero structure. 4129 The size of this array is equal to the number of local rows, i.e 'm'. 4130 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4131 submatrix (same value is used for all local rows). 4132 - o_nnz - array containing the number of nonzeros in the various rows of the 4133 OFF-DIAGONAL portion of the local submatrix (possibly different for 4134 each row) or NULL, if o_nz is used to specify the nonzero 4135 structure. The size of this array is equal to the number 4136 of local rows, i.e 'm'. 4137 4138 Output Parameter: 4139 . A - the matrix 4140 4141 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4142 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4143 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4144 4145 Notes: 4146 If the *_nnz parameter is given then the *_nz parameter is ignored 4147 4148 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4149 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4150 storage requirements for this matrix. 4151 4152 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4153 processor than it must be used on all processors that share the object for 4154 that argument. 4155 4156 The user MUST specify either the local or global matrix dimensions 4157 (possibly both). 4158 4159 The parallel matrix is partitioned across processors such that the 4160 first m0 rows belong to process 0, the next m1 rows belong to 4161 process 1, the next m2 rows belong to process 2 etc.. where 4162 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4163 values corresponding to [m x N] submatrix. 4164 4165 The columns are logically partitioned with the n0 columns belonging 4166 to 0th partition, the next n1 columns belonging to the next 4167 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4168 4169 The DIAGONAL portion of the local submatrix on any given processor 4170 is the submatrix corresponding to the rows and columns m,n 4171 corresponding to the given processor. i.e diagonal matrix on 4172 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4173 etc. The remaining portion of the local submatrix [m x (N-n)] 4174 constitute the OFF-DIAGONAL portion. The example below better 4175 illustrates this concept. 4176 4177 For a square global matrix we define each processor's diagonal portion 4178 to be its local rows and the corresponding columns (a square submatrix); 4179 each processor's off-diagonal portion encompasses the remainder of the 4180 local matrix (a rectangular submatrix). 4181 4182 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4183 4184 When calling this routine with a single process communicator, a matrix of 4185 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4186 type of communicator, use the construction mechanism 4187 .vb 4188 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4189 .ve 4190 4191 $ MatCreate(...,&A); 4192 $ MatSetType(A,MATMPIAIJ); 4193 $ MatSetSizes(A, m,n,M,N); 4194 $ MatMPIAIJSetPreallocation(A,...); 4195 4196 By default, this format uses inodes (identical nodes) when possible. 4197 We search for consecutive rows with the same nonzero structure, thereby 4198 reusing matrix information to achieve increased efficiency. 4199 4200 Options Database Keys: 4201 + -mat_no_inode - Do not use inodes 4202 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4203 - -mat_aij_oneindex - Internally use indexing starting at 1 4204 rather than 0. Note that when calling MatSetValues(), 4205 the user still MUST index entries starting at 0! 4206 4207 4208 Example usage: 4209 4210 Consider the following 8x8 matrix with 34 non-zero values, that is 4211 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4212 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4213 as follows 4214 4215 .vb 4216 1 2 0 | 0 3 0 | 0 4 4217 Proc0 0 5 6 | 7 0 0 | 8 0 4218 9 0 10 | 11 0 0 | 12 0 4219 ------------------------------------- 4220 13 0 14 | 15 16 17 | 0 0 4221 Proc1 0 18 0 | 19 20 21 | 0 0 4222 0 0 0 | 22 23 0 | 24 0 4223 ------------------------------------- 4224 Proc2 25 26 27 | 0 0 28 | 29 0 4225 30 0 0 | 31 32 33 | 0 34 4226 .ve 4227 4228 This can be represented as a collection of submatrices as 4229 4230 .vb 4231 A B C 4232 D E F 4233 G H I 4234 .ve 4235 4236 Where the submatrices A,B,C are owned by proc0, D,E,F are 4237 owned by proc1, G,H,I are owned by proc2. 4238 4239 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4240 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4241 The 'M','N' parameters are 8,8, and have the same values on all procs. 4242 4243 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4244 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4245 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4246 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4247 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4248 matrix, ans [DF] as another SeqAIJ matrix. 4249 4250 When d_nz, o_nz parameters are specified, d_nz storage elements are 4251 allocated for every row of the local diagonal submatrix, and o_nz 4252 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4253 One way to choose d_nz and o_nz is to use the max nonzerors per local 4254 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4255 In this case, the values of d_nz,o_nz are 4256 .vb 4257 proc0 : dnz = 2, o_nz = 2 4258 proc1 : dnz = 3, o_nz = 2 4259 proc2 : dnz = 1, o_nz = 4 4260 .ve 4261 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4262 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4263 for proc3. i.e we are using 12+15+10=37 storage locations to store 4264 34 values. 4265 4266 When d_nnz, o_nnz parameters are specified, the storage is specified 4267 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4268 In the above case the values for d_nnz,o_nnz are 4269 .vb 4270 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4271 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4272 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4273 .ve 4274 Here the space allocated is sum of all the above values i.e 34, and 4275 hence pre-allocation is perfect. 4276 4277 Level: intermediate 4278 4279 .keywords: matrix, aij, compressed row, sparse, parallel 4280 4281 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4282 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4283 @*/ 4284 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4285 { 4286 PetscErrorCode ierr; 4287 PetscMPIInt size; 4288 4289 PetscFunctionBegin; 4290 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4291 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4292 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4293 if (size > 1) { 4294 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4295 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4296 } else { 4297 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4298 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4299 } 4300 PetscFunctionReturn(0); 4301 } 4302 4303 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4304 { 4305 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4306 PetscBool flg; 4307 PetscErrorCode ierr; 4308 4309 PetscFunctionBegin; 4310 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4311 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4312 if (Ad) *Ad = a->A; 4313 if (Ao) *Ao = a->B; 4314 if (colmap) *colmap = a->garray; 4315 PetscFunctionReturn(0); 4316 } 4317 4318 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4319 { 4320 PetscErrorCode ierr; 4321 PetscInt m,N,i,rstart,nnz,Ii; 4322 PetscInt *indx; 4323 PetscScalar *values; 4324 4325 PetscFunctionBegin; 4326 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4327 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4328 PetscInt *dnz,*onz,sum,bs,cbs; 4329 4330 if (n == PETSC_DECIDE) { 4331 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4332 } 4333 /* Check sum(n) = N */ 4334 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4335 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4336 4337 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4338 rstart -= m; 4339 4340 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4341 for (i=0; i<m; i++) { 4342 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4343 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4344 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4345 } 4346 4347 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4348 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4349 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4350 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4351 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4352 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4353 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4354 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4355 } 4356 4357 /* numeric phase */ 4358 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4359 for (i=0; i<m; i++) { 4360 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4361 Ii = i + rstart; 4362 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4363 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4364 } 4365 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4366 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4367 PetscFunctionReturn(0); 4368 } 4369 4370 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4371 { 4372 PetscErrorCode ierr; 4373 PetscMPIInt rank; 4374 PetscInt m,N,i,rstart,nnz; 4375 size_t len; 4376 const PetscInt *indx; 4377 PetscViewer out; 4378 char *name; 4379 Mat B; 4380 const PetscScalar *values; 4381 4382 PetscFunctionBegin; 4383 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4384 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4385 /* Should this be the type of the diagonal block of A? */ 4386 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4387 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4388 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4389 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4390 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4391 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4392 for (i=0; i<m; i++) { 4393 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4394 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4395 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4396 } 4397 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4398 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4399 4400 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4401 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4402 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4403 sprintf(name,"%s.%d",outfile,rank); 4404 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4405 ierr = PetscFree(name);CHKERRQ(ierr); 4406 ierr = MatView(B,out);CHKERRQ(ierr); 4407 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4408 ierr = MatDestroy(&B);CHKERRQ(ierr); 4409 PetscFunctionReturn(0); 4410 } 4411 4412 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4413 { 4414 PetscErrorCode ierr; 4415 Mat_Merge_SeqsToMPI *merge; 4416 PetscContainer container; 4417 4418 PetscFunctionBegin; 4419 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4420 if (container) { 4421 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4427 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4428 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4429 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4430 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4431 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4432 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4433 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4434 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4435 ierr = PetscFree(merge);CHKERRQ(ierr); 4436 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4437 } 4438 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4439 PetscFunctionReturn(0); 4440 } 4441 4442 #include <../src/mat/utils/freespace.h> 4443 #include <petscbt.h> 4444 4445 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4446 { 4447 PetscErrorCode ierr; 4448 MPI_Comm comm; 4449 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4450 PetscMPIInt size,rank,taga,*len_s; 4451 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4452 PetscInt proc,m; 4453 PetscInt **buf_ri,**buf_rj; 4454 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4455 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4456 MPI_Request *s_waits,*r_waits; 4457 MPI_Status *status; 4458 MatScalar *aa=a->a; 4459 MatScalar **abuf_r,*ba_i; 4460 Mat_Merge_SeqsToMPI *merge; 4461 PetscContainer container; 4462 4463 PetscFunctionBegin; 4464 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4465 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4466 4467 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4468 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4469 4470 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4471 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4472 4473 bi = merge->bi; 4474 bj = merge->bj; 4475 buf_ri = merge->buf_ri; 4476 buf_rj = merge->buf_rj; 4477 4478 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4479 owners = merge->rowmap->range; 4480 len_s = merge->len_s; 4481 4482 /* send and recv matrix values */ 4483 /*-----------------------------*/ 4484 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4485 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4486 4487 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4488 for (proc=0,k=0; proc<size; proc++) { 4489 if (!len_s[proc]) continue; 4490 i = owners[proc]; 4491 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4492 k++; 4493 } 4494 4495 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4496 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4497 ierr = PetscFree(status);CHKERRQ(ierr); 4498 4499 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4500 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4501 4502 /* insert mat values of mpimat */ 4503 /*----------------------------*/ 4504 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4505 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4506 4507 for (k=0; k<merge->nrecv; k++) { 4508 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4509 nrows = *(buf_ri_k[k]); 4510 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4511 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4512 } 4513 4514 /* set values of ba */ 4515 m = merge->rowmap->n; 4516 for (i=0; i<m; i++) { 4517 arow = owners[rank] + i; 4518 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4519 bnzi = bi[i+1] - bi[i]; 4520 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4521 4522 /* add local non-zero vals of this proc's seqmat into ba */ 4523 anzi = ai[arow+1] - ai[arow]; 4524 aj = a->j + ai[arow]; 4525 aa = a->a + ai[arow]; 4526 nextaj = 0; 4527 for (j=0; nextaj<anzi; j++) { 4528 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4529 ba_i[j] += aa[nextaj++]; 4530 } 4531 } 4532 4533 /* add received vals into ba */ 4534 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4535 /* i-th row */ 4536 if (i == *nextrow[k]) { 4537 anzi = *(nextai[k]+1) - *nextai[k]; 4538 aj = buf_rj[k] + *(nextai[k]); 4539 aa = abuf_r[k] + *(nextai[k]); 4540 nextaj = 0; 4541 for (j=0; nextaj<anzi; j++) { 4542 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4543 ba_i[j] += aa[nextaj++]; 4544 } 4545 } 4546 nextrow[k]++; nextai[k]++; 4547 } 4548 } 4549 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4550 } 4551 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4552 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4553 4554 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4555 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4556 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4557 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4558 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4559 PetscFunctionReturn(0); 4560 } 4561 4562 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4563 { 4564 PetscErrorCode ierr; 4565 Mat B_mpi; 4566 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4567 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4568 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4569 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4570 PetscInt len,proc,*dnz,*onz,bs,cbs; 4571 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4572 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4573 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4574 MPI_Status *status; 4575 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4576 PetscBT lnkbt; 4577 Mat_Merge_SeqsToMPI *merge; 4578 PetscContainer container; 4579 4580 PetscFunctionBegin; 4581 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4582 4583 /* make sure it is a PETSc comm */ 4584 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4585 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4586 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4587 4588 ierr = PetscNew(&merge);CHKERRQ(ierr); 4589 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4590 4591 /* determine row ownership */ 4592 /*---------------------------------------------------------*/ 4593 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4594 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4595 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4596 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4597 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4598 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4599 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4600 4601 m = merge->rowmap->n; 4602 owners = merge->rowmap->range; 4603 4604 /* determine the number of messages to send, their lengths */ 4605 /*---------------------------------------------------------*/ 4606 len_s = merge->len_s; 4607 4608 len = 0; /* length of buf_si[] */ 4609 merge->nsend = 0; 4610 for (proc=0; proc<size; proc++) { 4611 len_si[proc] = 0; 4612 if (proc == rank) { 4613 len_s[proc] = 0; 4614 } else { 4615 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4616 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4617 } 4618 if (len_s[proc]) { 4619 merge->nsend++; 4620 nrows = 0; 4621 for (i=owners[proc]; i<owners[proc+1]; i++) { 4622 if (ai[i+1] > ai[i]) nrows++; 4623 } 4624 len_si[proc] = 2*(nrows+1); 4625 len += len_si[proc]; 4626 } 4627 } 4628 4629 /* determine the number and length of messages to receive for ij-structure */ 4630 /*-------------------------------------------------------------------------*/ 4631 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4632 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4633 4634 /* post the Irecv of j-structure */ 4635 /*-------------------------------*/ 4636 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4637 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4638 4639 /* post the Isend of j-structure */ 4640 /*--------------------------------*/ 4641 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4642 4643 for (proc=0, k=0; proc<size; proc++) { 4644 if (!len_s[proc]) continue; 4645 i = owners[proc]; 4646 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4647 k++; 4648 } 4649 4650 /* receives and sends of j-structure are complete */ 4651 /*------------------------------------------------*/ 4652 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4653 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4654 4655 /* send and recv i-structure */ 4656 /*---------------------------*/ 4657 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4658 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4659 4660 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4661 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4662 for (proc=0,k=0; proc<size; proc++) { 4663 if (!len_s[proc]) continue; 4664 /* form outgoing message for i-structure: 4665 buf_si[0]: nrows to be sent 4666 [1:nrows]: row index (global) 4667 [nrows+1:2*nrows+1]: i-structure index 4668 */ 4669 /*-------------------------------------------*/ 4670 nrows = len_si[proc]/2 - 1; 4671 buf_si_i = buf_si + nrows+1; 4672 buf_si[0] = nrows; 4673 buf_si_i[0] = 0; 4674 nrows = 0; 4675 for (i=owners[proc]; i<owners[proc+1]; i++) { 4676 anzi = ai[i+1] - ai[i]; 4677 if (anzi) { 4678 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4679 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4680 nrows++; 4681 } 4682 } 4683 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4684 k++; 4685 buf_si += len_si[proc]; 4686 } 4687 4688 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4689 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4690 4691 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4692 for (i=0; i<merge->nrecv; i++) { 4693 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4694 } 4695 4696 ierr = PetscFree(len_si);CHKERRQ(ierr); 4697 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4698 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4699 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4700 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4701 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4702 ierr = PetscFree(status);CHKERRQ(ierr); 4703 4704 /* compute a local seq matrix in each processor */ 4705 /*----------------------------------------------*/ 4706 /* allocate bi array and free space for accumulating nonzero column info */ 4707 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4708 bi[0] = 0; 4709 4710 /* create and initialize a linked list */ 4711 nlnk = N+1; 4712 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4713 4714 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4715 len = ai[owners[rank+1]] - ai[owners[rank]]; 4716 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4717 4718 current_space = free_space; 4719 4720 /* determine symbolic info for each local row */ 4721 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4722 4723 for (k=0; k<merge->nrecv; k++) { 4724 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4725 nrows = *buf_ri_k[k]; 4726 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4727 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4728 } 4729 4730 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4731 len = 0; 4732 for (i=0; i<m; i++) { 4733 bnzi = 0; 4734 /* add local non-zero cols of this proc's seqmat into lnk */ 4735 arow = owners[rank] + i; 4736 anzi = ai[arow+1] - ai[arow]; 4737 aj = a->j + ai[arow]; 4738 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4739 bnzi += nlnk; 4740 /* add received col data into lnk */ 4741 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4742 if (i == *nextrow[k]) { /* i-th row */ 4743 anzi = *(nextai[k]+1) - *nextai[k]; 4744 aj = buf_rj[k] + *nextai[k]; 4745 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4746 bnzi += nlnk; 4747 nextrow[k]++; nextai[k]++; 4748 } 4749 } 4750 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4751 4752 /* if free space is not available, make more free space */ 4753 if (current_space->local_remaining<bnzi) { 4754 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4755 nspacedouble++; 4756 } 4757 /* copy data into free space, then initialize lnk */ 4758 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4759 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4760 4761 current_space->array += bnzi; 4762 current_space->local_used += bnzi; 4763 current_space->local_remaining -= bnzi; 4764 4765 bi[i+1] = bi[i] + bnzi; 4766 } 4767 4768 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4769 4770 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4771 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4772 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4773 4774 /* create symbolic parallel matrix B_mpi */ 4775 /*---------------------------------------*/ 4776 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4777 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4778 if (n==PETSC_DECIDE) { 4779 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4780 } else { 4781 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4782 } 4783 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4784 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4785 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4786 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4787 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4788 4789 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4790 B_mpi->assembled = PETSC_FALSE; 4791 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4792 merge->bi = bi; 4793 merge->bj = bj; 4794 merge->buf_ri = buf_ri; 4795 merge->buf_rj = buf_rj; 4796 merge->coi = NULL; 4797 merge->coj = NULL; 4798 merge->owners_co = NULL; 4799 4800 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4801 4802 /* attach the supporting struct to B_mpi for reuse */ 4803 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4804 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4805 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4806 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4807 *mpimat = B_mpi; 4808 4809 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4810 PetscFunctionReturn(0); 4811 } 4812 4813 /*@C 4814 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4815 matrices from each processor 4816 4817 Collective on MPI_Comm 4818 4819 Input Parameters: 4820 + comm - the communicators the parallel matrix will live on 4821 . seqmat - the input sequential matrices 4822 . m - number of local rows (or PETSC_DECIDE) 4823 . n - number of local columns (or PETSC_DECIDE) 4824 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4825 4826 Output Parameter: 4827 . mpimat - the parallel matrix generated 4828 4829 Level: advanced 4830 4831 Notes: 4832 The dimensions of the sequential matrix in each processor MUST be the same. 4833 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4834 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4835 @*/ 4836 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4837 { 4838 PetscErrorCode ierr; 4839 PetscMPIInt size; 4840 4841 PetscFunctionBegin; 4842 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4843 if (size == 1) { 4844 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4845 if (scall == MAT_INITIAL_MATRIX) { 4846 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4847 } else { 4848 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4849 } 4850 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4851 PetscFunctionReturn(0); 4852 } 4853 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4854 if (scall == MAT_INITIAL_MATRIX) { 4855 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4856 } 4857 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4858 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4859 PetscFunctionReturn(0); 4860 } 4861 4862 /*@ 4863 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4864 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4865 with MatGetSize() 4866 4867 Not Collective 4868 4869 Input Parameters: 4870 + A - the matrix 4871 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4872 4873 Output Parameter: 4874 . A_loc - the local sequential matrix generated 4875 4876 Level: developer 4877 4878 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4879 4880 @*/ 4881 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4882 { 4883 PetscErrorCode ierr; 4884 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4885 Mat_SeqAIJ *mat,*a,*b; 4886 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4887 MatScalar *aa,*ba,*cam; 4888 PetscScalar *ca; 4889 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4890 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4891 PetscBool match; 4892 MPI_Comm comm; 4893 PetscMPIInt size; 4894 4895 PetscFunctionBegin; 4896 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4897 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4898 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4899 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4900 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4901 4902 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4903 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4904 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4905 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4906 aa = a->a; ba = b->a; 4907 if (scall == MAT_INITIAL_MATRIX) { 4908 if (size == 1) { 4909 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4910 PetscFunctionReturn(0); 4911 } 4912 4913 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4914 ci[0] = 0; 4915 for (i=0; i<am; i++) { 4916 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4917 } 4918 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4919 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4920 k = 0; 4921 for (i=0; i<am; i++) { 4922 ncols_o = bi[i+1] - bi[i]; 4923 ncols_d = ai[i+1] - ai[i]; 4924 /* off-diagonal portion of A */ 4925 for (jo=0; jo<ncols_o; jo++) { 4926 col = cmap[*bj]; 4927 if (col >= cstart) break; 4928 cj[k] = col; bj++; 4929 ca[k++] = *ba++; 4930 } 4931 /* diagonal portion of A */ 4932 for (j=0; j<ncols_d; j++) { 4933 cj[k] = cstart + *aj++; 4934 ca[k++] = *aa++; 4935 } 4936 /* off-diagonal portion of A */ 4937 for (j=jo; j<ncols_o; j++) { 4938 cj[k] = cmap[*bj++]; 4939 ca[k++] = *ba++; 4940 } 4941 } 4942 /* put together the new matrix */ 4943 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4944 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4945 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4946 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4947 mat->free_a = PETSC_TRUE; 4948 mat->free_ij = PETSC_TRUE; 4949 mat->nonew = 0; 4950 } else if (scall == MAT_REUSE_MATRIX) { 4951 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4952 ci = mat->i; cj = mat->j; cam = mat->a; 4953 for (i=0; i<am; i++) { 4954 /* off-diagonal portion of A */ 4955 ncols_o = bi[i+1] - bi[i]; 4956 for (jo=0; jo<ncols_o; jo++) { 4957 col = cmap[*bj]; 4958 if (col >= cstart) break; 4959 *cam++ = *ba++; bj++; 4960 } 4961 /* diagonal portion of A */ 4962 ncols_d = ai[i+1] - ai[i]; 4963 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4964 /* off-diagonal portion of A */ 4965 for (j=jo; j<ncols_o; j++) { 4966 *cam++ = *ba++; bj++; 4967 } 4968 } 4969 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4970 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4971 PetscFunctionReturn(0); 4972 } 4973 4974 /*@C 4975 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4976 4977 Not Collective 4978 4979 Input Parameters: 4980 + A - the matrix 4981 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4982 - row, col - index sets of rows and columns to extract (or NULL) 4983 4984 Output Parameter: 4985 . A_loc - the local sequential matrix generated 4986 4987 Level: developer 4988 4989 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4990 4991 @*/ 4992 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4993 { 4994 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4995 PetscErrorCode ierr; 4996 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4997 IS isrowa,iscola; 4998 Mat *aloc; 4999 PetscBool match; 5000 5001 PetscFunctionBegin; 5002 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5003 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5004 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5005 if (!row) { 5006 start = A->rmap->rstart; end = A->rmap->rend; 5007 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5008 } else { 5009 isrowa = *row; 5010 } 5011 if (!col) { 5012 start = A->cmap->rstart; 5013 cmap = a->garray; 5014 nzA = a->A->cmap->n; 5015 nzB = a->B->cmap->n; 5016 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5017 ncols = 0; 5018 for (i=0; i<nzB; i++) { 5019 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5020 else break; 5021 } 5022 imark = i; 5023 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5024 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5025 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5026 } else { 5027 iscola = *col; 5028 } 5029 if (scall != MAT_INITIAL_MATRIX) { 5030 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5031 aloc[0] = *A_loc; 5032 } 5033 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5034 *A_loc = aloc[0]; 5035 ierr = PetscFree(aloc);CHKERRQ(ierr); 5036 if (!row) { 5037 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5038 } 5039 if (!col) { 5040 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5041 } 5042 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5043 PetscFunctionReturn(0); 5044 } 5045 5046 /*@C 5047 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5048 5049 Collective on Mat 5050 5051 Input Parameters: 5052 + A,B - the matrices in mpiaij format 5053 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5054 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5055 5056 Output Parameter: 5057 + rowb, colb - index sets of rows and columns of B to extract 5058 - B_seq - the sequential matrix generated 5059 5060 Level: developer 5061 5062 @*/ 5063 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5064 { 5065 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5066 PetscErrorCode ierr; 5067 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5068 IS isrowb,iscolb; 5069 Mat *bseq=NULL; 5070 5071 PetscFunctionBegin; 5072 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5073 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5074 } 5075 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5076 5077 if (scall == MAT_INITIAL_MATRIX) { 5078 start = A->cmap->rstart; 5079 cmap = a->garray; 5080 nzA = a->A->cmap->n; 5081 nzB = a->B->cmap->n; 5082 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5083 ncols = 0; 5084 for (i=0; i<nzB; i++) { /* row < local row index */ 5085 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5086 else break; 5087 } 5088 imark = i; 5089 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5090 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5091 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5092 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5093 } else { 5094 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5095 isrowb = *rowb; iscolb = *colb; 5096 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5097 bseq[0] = *B_seq; 5098 } 5099 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5100 *B_seq = bseq[0]; 5101 ierr = PetscFree(bseq);CHKERRQ(ierr); 5102 if (!rowb) { 5103 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5104 } else { 5105 *rowb = isrowb; 5106 } 5107 if (!colb) { 5108 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5109 } else { 5110 *colb = iscolb; 5111 } 5112 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5113 PetscFunctionReturn(0); 5114 } 5115 5116 /* 5117 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5118 of the OFF-DIAGONAL portion of local A 5119 5120 Collective on Mat 5121 5122 Input Parameters: 5123 + A,B - the matrices in mpiaij format 5124 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5125 5126 Output Parameter: 5127 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5128 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5129 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5130 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5131 5132 Level: developer 5133 5134 */ 5135 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5136 { 5137 VecScatter_MPI_General *gen_to,*gen_from; 5138 PetscErrorCode ierr; 5139 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5140 Mat_SeqAIJ *b_oth; 5141 VecScatter ctx; 5142 MPI_Comm comm; 5143 PetscMPIInt *rprocs,*sprocs,tag,rank; 5144 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5145 PetscInt *rvalues,*svalues; 5146 MatScalar *b_otha,*bufa,*bufA; 5147 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5148 MPI_Request *rwaits = NULL,*swaits = NULL; 5149 MPI_Status *sstatus,rstatus; 5150 PetscMPIInt jj,size; 5151 PetscInt *cols,sbs,rbs; 5152 PetscScalar *vals; 5153 5154 PetscFunctionBegin; 5155 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5156 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5157 5158 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5159 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5160 } 5161 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5162 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5163 5164 if (size == 1) { 5165 startsj_s = NULL; 5166 bufa_ptr = NULL; 5167 *B_oth = NULL; 5168 PetscFunctionReturn(0); 5169 } 5170 5171 if (!a->Mvctx_mpi1) { /* create a->Mvctx_mpi1 to be used for Mat-Mat ops */ 5172 a->Mvctx_mpi1_flg = PETSC_TRUE; 5173 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5174 } 5175 ctx = a->Mvctx_mpi1; 5176 tag = ((PetscObject)ctx)->tag; 5177 5178 gen_to = (VecScatter_MPI_General*)ctx->todata; 5179 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5180 nrecvs = gen_from->n; 5181 nsends = gen_to->n; 5182 5183 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5184 srow = gen_to->indices; /* local row index to be sent */ 5185 sstarts = gen_to->starts; 5186 sprocs = gen_to->procs; 5187 sstatus = gen_to->sstatus; 5188 sbs = gen_to->bs; 5189 rstarts = gen_from->starts; 5190 rprocs = gen_from->procs; 5191 rbs = gen_from->bs; 5192 5193 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5194 if (scall == MAT_INITIAL_MATRIX) { 5195 /* i-array */ 5196 /*---------*/ 5197 /* post receives */ 5198 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5199 for (i=0; i<nrecvs; i++) { 5200 rowlen = rvalues + rstarts[i]*rbs; 5201 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5202 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5203 } 5204 5205 /* pack the outgoing message */ 5206 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5207 5208 sstartsj[0] = 0; 5209 rstartsj[0] = 0; 5210 len = 0; /* total length of j or a array to be sent */ 5211 k = 0; 5212 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5213 for (i=0; i<nsends; i++) { 5214 rowlen = svalues + sstarts[i]*sbs; 5215 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5216 for (j=0; j<nrows; j++) { 5217 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5218 for (l=0; l<sbs; l++) { 5219 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5220 5221 rowlen[j*sbs+l] = ncols; 5222 5223 len += ncols; 5224 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5225 } 5226 k++; 5227 } 5228 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5229 5230 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5231 } 5232 /* recvs and sends of i-array are completed */ 5233 i = nrecvs; 5234 while (i--) { 5235 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5236 } 5237 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5238 ierr = PetscFree(svalues);CHKERRQ(ierr); 5239 5240 /* allocate buffers for sending j and a arrays */ 5241 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5242 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5243 5244 /* create i-array of B_oth */ 5245 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5246 5247 b_othi[0] = 0; 5248 len = 0; /* total length of j or a array to be received */ 5249 k = 0; 5250 for (i=0; i<nrecvs; i++) { 5251 rowlen = rvalues + rstarts[i]*rbs; 5252 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5253 for (j=0; j<nrows; j++) { 5254 b_othi[k+1] = b_othi[k] + rowlen[j]; 5255 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5256 k++; 5257 } 5258 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5259 } 5260 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5261 5262 /* allocate space for j and a arrrays of B_oth */ 5263 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5264 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5265 5266 /* j-array */ 5267 /*---------*/ 5268 /* post receives of j-array */ 5269 for (i=0; i<nrecvs; i++) { 5270 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5271 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5272 } 5273 5274 /* pack the outgoing message j-array */ 5275 k = 0; 5276 for (i=0; i<nsends; i++) { 5277 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5278 bufJ = bufj+sstartsj[i]; 5279 for (j=0; j<nrows; j++) { 5280 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5281 for (ll=0; ll<sbs; ll++) { 5282 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5283 for (l=0; l<ncols; l++) { 5284 *bufJ++ = cols[l]; 5285 } 5286 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5287 } 5288 } 5289 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5290 } 5291 5292 /* recvs and sends of j-array are completed */ 5293 i = nrecvs; 5294 while (i--) { 5295 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5296 } 5297 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5298 } else if (scall == MAT_REUSE_MATRIX) { 5299 sstartsj = *startsj_s; 5300 rstartsj = *startsj_r; 5301 bufa = *bufa_ptr; 5302 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5303 b_otha = b_oth->a; 5304 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5305 5306 /* a-array */ 5307 /*---------*/ 5308 /* post receives of a-array */ 5309 for (i=0; i<nrecvs; i++) { 5310 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5311 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5312 } 5313 5314 /* pack the outgoing message a-array */ 5315 k = 0; 5316 for (i=0; i<nsends; i++) { 5317 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5318 bufA = bufa+sstartsj[i]; 5319 for (j=0; j<nrows; j++) { 5320 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5321 for (ll=0; ll<sbs; ll++) { 5322 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5323 for (l=0; l<ncols; l++) { 5324 *bufA++ = vals[l]; 5325 } 5326 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5327 } 5328 } 5329 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5330 } 5331 /* recvs and sends of a-array are completed */ 5332 i = nrecvs; 5333 while (i--) { 5334 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5335 } 5336 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5337 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5338 5339 if (scall == MAT_INITIAL_MATRIX) { 5340 /* put together the new matrix */ 5341 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5342 5343 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5344 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5345 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5346 b_oth->free_a = PETSC_TRUE; 5347 b_oth->free_ij = PETSC_TRUE; 5348 b_oth->nonew = 0; 5349 5350 ierr = PetscFree(bufj);CHKERRQ(ierr); 5351 if (!startsj_s || !bufa_ptr) { 5352 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5353 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5354 } else { 5355 *startsj_s = sstartsj; 5356 *startsj_r = rstartsj; 5357 *bufa_ptr = bufa; 5358 } 5359 } 5360 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5361 PetscFunctionReturn(0); 5362 } 5363 5364 /*@C 5365 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5366 5367 Not Collective 5368 5369 Input Parameters: 5370 . A - The matrix in mpiaij format 5371 5372 Output Parameter: 5373 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5374 . colmap - A map from global column index to local index into lvec 5375 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5376 5377 Level: developer 5378 5379 @*/ 5380 #if defined(PETSC_USE_CTABLE) 5381 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5382 #else 5383 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5384 #endif 5385 { 5386 Mat_MPIAIJ *a; 5387 5388 PetscFunctionBegin; 5389 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5390 PetscValidPointer(lvec, 2); 5391 PetscValidPointer(colmap, 3); 5392 PetscValidPointer(multScatter, 4); 5393 a = (Mat_MPIAIJ*) A->data; 5394 if (lvec) *lvec = a->lvec; 5395 if (colmap) *colmap = a->colmap; 5396 if (multScatter) *multScatter = a->Mvctx; 5397 PetscFunctionReturn(0); 5398 } 5399 5400 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5401 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5402 #if defined(PETSC_HAVE_MKL_SPARSE) 5403 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5404 #endif 5405 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5406 #if defined(PETSC_HAVE_ELEMENTAL) 5407 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5408 #endif 5409 #if defined(PETSC_HAVE_HYPRE) 5410 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5411 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5412 #endif 5413 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5414 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5415 5416 /* 5417 Computes (B'*A')' since computing B*A directly is untenable 5418 5419 n p p 5420 ( ) ( ) ( ) 5421 m ( A ) * n ( B ) = m ( C ) 5422 ( ) ( ) ( ) 5423 5424 */ 5425 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5426 { 5427 PetscErrorCode ierr; 5428 Mat At,Bt,Ct; 5429 5430 PetscFunctionBegin; 5431 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5432 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5433 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5434 ierr = MatDestroy(&At);CHKERRQ(ierr); 5435 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5436 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5437 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5438 PetscFunctionReturn(0); 5439 } 5440 5441 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5442 { 5443 PetscErrorCode ierr; 5444 PetscInt m=A->rmap->n,n=B->cmap->n; 5445 Mat Cmat; 5446 5447 PetscFunctionBegin; 5448 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5449 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5450 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5451 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5452 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5453 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5454 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5455 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5456 5457 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5458 5459 *C = Cmat; 5460 PetscFunctionReturn(0); 5461 } 5462 5463 /* ----------------------------------------------------------------*/ 5464 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5465 { 5466 PetscErrorCode ierr; 5467 5468 PetscFunctionBegin; 5469 if (scall == MAT_INITIAL_MATRIX) { 5470 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5471 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5472 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5473 } 5474 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5475 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5476 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5477 PetscFunctionReturn(0); 5478 } 5479 5480 /*MC 5481 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5482 5483 Options Database Keys: 5484 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5485 5486 Level: beginner 5487 5488 .seealso: MatCreateAIJ() 5489 M*/ 5490 5491 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5492 { 5493 Mat_MPIAIJ *b; 5494 PetscErrorCode ierr; 5495 PetscMPIInt size; 5496 5497 PetscFunctionBegin; 5498 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5499 5500 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5501 B->data = (void*)b; 5502 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5503 B->assembled = PETSC_FALSE; 5504 B->insertmode = NOT_SET_VALUES; 5505 b->size = size; 5506 5507 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5508 5509 /* build cache for off array entries formed */ 5510 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5511 5512 b->donotstash = PETSC_FALSE; 5513 b->colmap = 0; 5514 b->garray = 0; 5515 b->roworiented = PETSC_TRUE; 5516 5517 /* stuff used for matrix vector multiply */ 5518 b->lvec = NULL; 5519 b->Mvctx = NULL; 5520 5521 /* stuff for MatGetRow() */ 5522 b->rowindices = 0; 5523 b->rowvalues = 0; 5524 b->getrowactive = PETSC_FALSE; 5525 5526 /* flexible pointer used in CUSP/CUSPARSE classes */ 5527 b->spptr = NULL; 5528 5529 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5530 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5531 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5532 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5533 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5534 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5535 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5536 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5537 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5538 #if defined(PETSC_HAVE_MKL_SPARSE) 5539 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5540 #endif 5541 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5542 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5543 #if defined(PETSC_HAVE_ELEMENTAL) 5544 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5545 #endif 5546 #if defined(PETSC_HAVE_HYPRE) 5547 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5548 #endif 5549 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5550 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5551 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5552 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5553 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5554 #if defined(PETSC_HAVE_HYPRE) 5555 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5556 #endif 5557 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5558 PetscFunctionReturn(0); 5559 } 5560 5561 /*@C 5562 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5563 and "off-diagonal" part of the matrix in CSR format. 5564 5565 Collective on MPI_Comm 5566 5567 Input Parameters: 5568 + comm - MPI communicator 5569 . m - number of local rows (Cannot be PETSC_DECIDE) 5570 . n - This value should be the same as the local size used in creating the 5571 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5572 calculated if N is given) For square matrices n is almost always m. 5573 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5574 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5575 . i - row indices for "diagonal" portion of matrix 5576 . j - column indices 5577 . a - matrix values 5578 . oi - row indices for "off-diagonal" portion of matrix 5579 . oj - column indices 5580 - oa - matrix values 5581 5582 Output Parameter: 5583 . mat - the matrix 5584 5585 Level: advanced 5586 5587 Notes: 5588 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5589 must free the arrays once the matrix has been destroyed and not before. 5590 5591 The i and j indices are 0 based 5592 5593 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5594 5595 This sets local rows and cannot be used to set off-processor values. 5596 5597 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5598 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5599 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5600 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5601 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5602 communication if it is known that only local entries will be set. 5603 5604 .keywords: matrix, aij, compressed row, sparse, parallel 5605 5606 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5607 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5608 @*/ 5609 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5610 { 5611 PetscErrorCode ierr; 5612 Mat_MPIAIJ *maij; 5613 5614 PetscFunctionBegin; 5615 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5616 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5617 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5618 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5619 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5620 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5621 maij = (Mat_MPIAIJ*) (*mat)->data; 5622 5623 (*mat)->preallocated = PETSC_TRUE; 5624 5625 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5626 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5627 5628 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5629 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5630 5631 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5632 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5633 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5634 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5635 5636 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5637 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5638 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5639 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5640 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5641 PetscFunctionReturn(0); 5642 } 5643 5644 /* 5645 Special version for direct calls from Fortran 5646 */ 5647 #include <petsc/private/fortranimpl.h> 5648 5649 /* Change these macros so can be used in void function */ 5650 #undef CHKERRQ 5651 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5652 #undef SETERRQ2 5653 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5654 #undef SETERRQ3 5655 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5656 #undef SETERRQ 5657 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5658 5659 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5660 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5661 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5662 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5663 #else 5664 #endif 5665 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5666 { 5667 Mat mat = *mmat; 5668 PetscInt m = *mm, n = *mn; 5669 InsertMode addv = *maddv; 5670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5671 PetscScalar value; 5672 PetscErrorCode ierr; 5673 5674 MatCheckPreallocated(mat,1); 5675 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5676 5677 #if defined(PETSC_USE_DEBUG) 5678 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5679 #endif 5680 { 5681 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5682 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5683 PetscBool roworiented = aij->roworiented; 5684 5685 /* Some Variables required in the macro */ 5686 Mat A = aij->A; 5687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5688 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5689 MatScalar *aa = a->a; 5690 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5691 Mat B = aij->B; 5692 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5693 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5694 MatScalar *ba = b->a; 5695 5696 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5697 PetscInt nonew = a->nonew; 5698 MatScalar *ap1,*ap2; 5699 5700 PetscFunctionBegin; 5701 for (i=0; i<m; i++) { 5702 if (im[i] < 0) continue; 5703 #if defined(PETSC_USE_DEBUG) 5704 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5705 #endif 5706 if (im[i] >= rstart && im[i] < rend) { 5707 row = im[i] - rstart; 5708 lastcol1 = -1; 5709 rp1 = aj + ai[row]; 5710 ap1 = aa + ai[row]; 5711 rmax1 = aimax[row]; 5712 nrow1 = ailen[row]; 5713 low1 = 0; 5714 high1 = nrow1; 5715 lastcol2 = -1; 5716 rp2 = bj + bi[row]; 5717 ap2 = ba + bi[row]; 5718 rmax2 = bimax[row]; 5719 nrow2 = bilen[row]; 5720 low2 = 0; 5721 high2 = nrow2; 5722 5723 for (j=0; j<n; j++) { 5724 if (roworiented) value = v[i*n+j]; 5725 else value = v[i+j*m]; 5726 if (in[j] >= cstart && in[j] < cend) { 5727 col = in[j] - cstart; 5728 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5729 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5730 } else if (in[j] < 0) continue; 5731 #if defined(PETSC_USE_DEBUG) 5732 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5733 #endif 5734 else { 5735 if (mat->was_assembled) { 5736 if (!aij->colmap) { 5737 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5738 } 5739 #if defined(PETSC_USE_CTABLE) 5740 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5741 col--; 5742 #else 5743 col = aij->colmap[in[j]] - 1; 5744 #endif 5745 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5746 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5747 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5748 col = in[j]; 5749 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5750 B = aij->B; 5751 b = (Mat_SeqAIJ*)B->data; 5752 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5753 rp2 = bj + bi[row]; 5754 ap2 = ba + bi[row]; 5755 rmax2 = bimax[row]; 5756 nrow2 = bilen[row]; 5757 low2 = 0; 5758 high2 = nrow2; 5759 bm = aij->B->rmap->n; 5760 ba = b->a; 5761 } 5762 } else col = in[j]; 5763 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5764 } 5765 } 5766 } else if (!aij->donotstash) { 5767 if (roworiented) { 5768 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5769 } else { 5770 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5771 } 5772 } 5773 } 5774 } 5775 PetscFunctionReturnVoid(); 5776 } 5777 5778