1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 51 PetscFunctionBegin; 52 if (mat->A) { 53 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 54 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 60 { 61 PetscErrorCode ierr; 62 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 64 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 65 const PetscInt *ia,*ib; 66 const MatScalar *aa,*bb; 67 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 68 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 69 70 PetscFunctionBegin; 71 *keptrows = 0; 72 ia = a->i; 73 ib = b->i; 74 for (i=0; i<m; i++) { 75 na = ia[i+1] - ia[i]; 76 nb = ib[i+1] - ib[i]; 77 if (!na && !nb) { 78 cnt++; 79 goto ok1; 80 } 81 aa = a->a + ia[i]; 82 for (j=0; j<na; j++) { 83 if (aa[j] != 0.0) goto ok1; 84 } 85 bb = b->a + ib[i]; 86 for (j=0; j <nb; j++) { 87 if (bb[j] != 0.0) goto ok1; 88 } 89 cnt++; 90 ok1:; 91 } 92 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 93 if (!n0rows) PetscFunctionReturn(0); 94 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) continue; 100 aa = a->a + ia[i]; 101 for (j=0; j<na;j++) { 102 if (aa[j] != 0.0) { 103 rows[cnt++] = rstart + i; 104 goto ok2; 105 } 106 } 107 bb = b->a + ib[i]; 108 for (j=0; j<nb; j++) { 109 if (bb[j] != 0.0) { 110 rows[cnt++] = rstart + i; 111 goto ok2; 112 } 113 } 114 ok2:; 115 } 116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 121 { 122 PetscErrorCode ierr; 123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 124 125 PetscFunctionBegin; 126 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 127 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 128 } else { 129 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 130 } 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 462 { \ 463 if (col <= lastcol2) low2 = 0; \ 464 else high2 = nrow2; \ 465 lastcol2 = col; \ 466 while (high2-low2 > 5) { \ 467 t = (low2+high2)/2; \ 468 if (rp2[t] > col) high2 = t; \ 469 else low2 = t; \ 470 } \ 471 for (_i=low2; _i<high2; _i++) { \ 472 if (rp2[_i] > col) break; \ 473 if (rp2[_i] == col) { \ 474 if (addv == ADD_VALUES) ap2[_i] += value; \ 475 else ap2[_i] = value; \ 476 goto b_noinsert; \ 477 } \ 478 } \ 479 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 480 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 482 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 483 N = nrow2++ - 1; b->nz++; high2++; \ 484 /* shift up all the later entries in this row */ \ 485 for (ii=N; ii>=_i; ii--) { \ 486 rp2[ii+1] = rp2[ii]; \ 487 ap2[ii+1] = ap2[ii]; \ 488 } \ 489 rp2[_i] = col; \ 490 ap2[_i] = value; \ 491 B->nonzerostate++; \ 492 b_noinsert: ; \ 493 bilen[row] = nrow2; \ 494 } 495 496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 497 { 498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 500 PetscErrorCode ierr; 501 PetscInt l,*garray = mat->garray,diag; 502 503 PetscFunctionBegin; 504 /* code only works for square matrices A */ 505 506 /* find size of row to the left of the diagonal part */ 507 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 508 row = row - diag; 509 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 510 if (garray[b->j[b->i[row]+l]] > diag) break; 511 } 512 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 513 514 /* diagonal part */ 515 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* right of diagonal part */ 518 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 523 { 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 525 PetscScalar value; 526 PetscErrorCode ierr; 527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 529 PetscBool roworiented = aij->roworiented; 530 531 /* Some Variables required in the macro */ 532 Mat A = aij->A; 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 535 MatScalar *aa = a->a; 536 PetscBool ignorezeroentries = a->ignorezeroentries; 537 Mat B = aij->B; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 540 MatScalar *ba = b->a; 541 542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 543 PetscInt nonew; 544 MatScalar *ap1,*ap2; 545 546 PetscFunctionBegin; 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 lastcol1 = -1; 555 rp1 = aj + ai[row]; 556 ap1 = aa + ai[row]; 557 rmax1 = aimax[row]; 558 nrow1 = ailen[row]; 559 low1 = 0; 560 high1 = nrow1; 561 lastcol2 = -1; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 569 for (j=0; j<n; j++) { 570 if (roworiented) value = v[i*n+j]; 571 else value = v[i+j*m]; 572 if (in[j] >= cstart && in[j] < cend) { 573 col = in[j] - cstart; 574 nonew = a->nonew; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 576 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 577 } else if (in[j] < 0) continue; 578 #if defined(PETSC_USE_DEBUG) 579 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 580 #endif 581 else { 582 if (mat->was_assembled) { 583 if (!aij->colmap) { 584 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 593 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ*)B->data; 598 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 599 rp2 = bj + bi[row]; 600 ap2 = ba + bi[row]; 601 rmax2 = bimax[row]; 602 nrow2 = bilen[row]; 603 low2 = 0; 604 high2 = nrow2; 605 bm = aij->B->rmap->n; 606 ba = b->a; 607 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 608 } else col = in[j]; 609 nonew = b->nonew; 610 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 611 } 612 } 613 } else { 614 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 615 if (!aij->donotstash) { 616 mat->assembled = PETSC_FALSE; 617 if (roworiented) { 618 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!aij->colmap) { 649 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 669 670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscInt nstash,reallocs; 675 676 PetscFunctionBegin; 677 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 678 679 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 680 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 681 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 689 PetscErrorCode ierr; 690 PetscMPIInt n; 691 PetscInt i,j,rstart,ncols,flg; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscScalar *val; 695 696 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 697 698 PetscFunctionBegin; 699 if (!aij->donotstash && !mat->nooffprocentries) { 700 while (1) { 701 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 702 if (!flg) break; 703 704 for (i=0; i<n; ) { 705 /* Now identify the consecutive vals belonging to the same row */ 706 for (j=i,rstart=row[j]; j<n; j++) { 707 if (row[j] != rstart) break; 708 } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 713 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 } 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourselfs, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 730 if (mat->was_assembled && !other_disassembled) { 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 740 741 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 742 743 aij->rowvalues = 0; 744 745 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 746 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 747 748 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 749 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 750 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 751 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 757 { 758 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 763 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 768 { 769 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 770 PetscInt *lrows; 771 PetscInt r, len; 772 PetscErrorCode ierr; 773 774 PetscFunctionBegin; 775 /* get locally owned rows */ 776 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 789 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 790 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 791 PetscBool cong; 792 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 793 if (cong) A->congruentlayouts = 1; 794 else A->congruentlayouts = 0; 795 } 796 if ((diag != 0.0) && A->congruentlayouts) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 821 { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 823 PetscErrorCode ierr; 824 PetscMPIInt n = A->rmap->n; 825 PetscInt i,j,r,m,p = 0,len = 0; 826 PetscInt *lrows,*owners = A->rmap->range; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb,*mask; 831 Vec xmask,lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 833 const PetscInt *aj, *ii,*ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 844 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 852 /* Collect flags for rows to be zeroed */ 853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 858 /* zero diagonal part of matrix */ 859 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 860 /* handle off diagonal part of matrix */ 861 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 862 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 863 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 864 for (i=0; i<len; i++) bb[lrows[i]] = 1; 865 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 866 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 869 if (x) { 870 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 873 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 874 } 875 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 876 /* remove zeroed rows of off diagonal matrix */ 877 ii = aij->i; 878 for (i=0; i<len; i++) { 879 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 /* loop over all elements of off process part of matrix zeroing removed columns*/ 882 if (aij->compressedrow.use) { 883 m = aij->compressedrow.nrows; 884 ii = aij->compressedrow.i; 885 ridx = aij->compressedrow.rindex; 886 for (i=0; i<m; i++) { 887 n = ii[i+1] - ii[i]; 888 aj = aij->j + ii[i]; 889 aa = aij->a + ii[i]; 890 891 for (j=0; j<n; j++) { 892 if (PetscAbsScalar(mask[*aj])) { 893 if (b) bb[*ridx] -= *aa*xx[*aj]; 894 *aa = 0.0; 895 } 896 aa++; 897 aj++; 898 } 899 ridx++; 900 } 901 } else { /* do not use compressed row format */ 902 m = l->B->rmap->n; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij->a + ii[i]; 907 for (j=0; j<n; j++) { 908 if (PetscAbsScalar(mask[*aj])) { 909 if (b) bb[i] -= *aa*xx[*aj]; 910 *aa = 0.0; 911 } 912 aa++; 913 aj++; 914 } 915 } 916 } 917 if (x) { 918 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 919 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 } 921 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 923 ierr = PetscFree(lrows);CHKERRQ(ierr); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 927 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 928 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 VecScatter Mvctx = a->Mvctx; 939 940 PetscFunctionBegin; 941 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 942 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 943 944 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 946 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 962 { 963 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 964 PetscErrorCode ierr; 965 VecScatter Mvctx = a->Mvctx; 966 967 PetscFunctionBegin; 968 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 969 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 970 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 971 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 977 { 978 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 979 PetscErrorCode ierr; 980 PetscBool merged; 981 982 PetscFunctionBegin; 983 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 984 /* do nondiagonal part */ 985 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 986 if (!merged) { 987 /* send it on its way */ 988 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 989 /* do local part */ 990 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 991 /* receive remote parts: note this assumes the values are not actually */ 992 /* added in yy until the next line, */ 993 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 } else { 995 /* do local part */ 996 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 997 /* send it on its way */ 998 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 /* values actually were received in the Begin() but we need to call this nop */ 1000 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1001 } 1002 PetscFunctionReturn(0); 1003 } 1004 1005 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1006 { 1007 MPI_Comm comm; 1008 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1009 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1010 IS Me,Notme; 1011 PetscErrorCode ierr; 1012 PetscInt M,N,first,last,*notme,i; 1013 PetscMPIInt size; 1014 1015 PetscFunctionBegin; 1016 /* Easy test: symmetric diagonal block */ 1017 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1018 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1019 if (!*f) PetscFunctionReturn(0); 1020 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1021 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1022 if (size == 1) PetscFunctionReturn(0); 1023 1024 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1025 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1026 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1027 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1028 for (i=0; i<first; i++) notme[i] = i; 1029 for (i=last; i<M; i++) notme[i-last+first] = i; 1030 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1031 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1032 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1033 Aoff = Aoffs[0]; 1034 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1035 Boff = Boffs[0]; 1036 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1037 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1038 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1039 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1040 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1041 ierr = PetscFree(notme);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1046 { 1047 PetscErrorCode ierr; 1048 1049 PetscFunctionBegin; 1050 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 /* do nondiagonal part */ 1061 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1062 /* send it on its way */ 1063 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 /* do local part */ 1065 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1066 /* receive remote parts */ 1067 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /* 1072 This only works correctly for square matrices where the subblock A->A is the 1073 diagonal block 1074 */ 1075 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1076 { 1077 PetscErrorCode ierr; 1078 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1079 1080 PetscFunctionBegin; 1081 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1082 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1083 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 1092 PetscFunctionBegin; 1093 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1094 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1099 { 1100 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 #if defined(PETSC_USE_LOG) 1105 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1106 #endif 1107 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1108 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1109 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1110 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1111 #if defined(PETSC_USE_CTABLE) 1112 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1113 #else 1114 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1115 #endif 1116 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1117 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1118 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1119 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1120 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1121 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1122 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1123 1124 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1133 #if defined(PETSC_HAVE_ELEMENTAL) 1134 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1135 #endif 1136 #if defined(PETSC_HAVE_HYPRE) 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1139 #endif 1140 PetscFunctionReturn(0); 1141 } 1142 1143 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1144 { 1145 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1146 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1147 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1148 PetscErrorCode ierr; 1149 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1150 int fd; 1151 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1152 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1153 PetscScalar *column_values; 1154 PetscInt message_count,flowcontrolcount; 1155 FILE *file; 1156 1157 PetscFunctionBegin; 1158 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1159 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1160 nz = A->nz + B->nz; 1161 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1162 if (!rank) { 1163 header[0] = MAT_FILE_CLASSID; 1164 header[1] = mat->rmap->N; 1165 header[2] = mat->cmap->N; 1166 1167 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1168 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1169 /* get largest number of rows any processor has */ 1170 rlen = mat->rmap->n; 1171 range = mat->rmap->range; 1172 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1173 } else { 1174 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1175 rlen = mat->rmap->n; 1176 } 1177 1178 /* load up the local row counts */ 1179 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1180 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1181 1182 /* store the row lengths to the file */ 1183 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1184 if (!rank) { 1185 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 for (i=1; i<size; i++) { 1187 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1188 rlen = range[i+1] - range[i]; 1189 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1190 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1191 } 1192 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1193 } else { 1194 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1195 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1196 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1197 } 1198 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1199 1200 /* load up the local column indices */ 1201 nzmax = nz; /* th processor needs space a largest processor needs */ 1202 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1204 cnt = 0; 1205 for (i=0; i<mat->rmap->n; i++) { 1206 for (j=B->i[i]; j<B->i[i+1]; j++) { 1207 if ((col = garray[B->j[j]]) > cstart) break; 1208 column_indices[cnt++] = col; 1209 } 1210 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1211 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1212 } 1213 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1214 1215 /* store the column indices to the file */ 1216 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1217 if (!rank) { 1218 MPI_Status status; 1219 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1220 for (i=1; i<size; i++) { 1221 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1222 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1223 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1224 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1226 } 1227 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1228 } else { 1229 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1230 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1235 1236 /* load up the local column values */ 1237 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1238 cnt = 0; 1239 for (i=0; i<mat->rmap->n; i++) { 1240 for (j=B->i[i]; j<B->i[i+1]; j++) { 1241 if (garray[B->j[j]] > cstart) break; 1242 column_values[cnt++] = B->a[j]; 1243 } 1244 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1245 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1246 } 1247 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1248 1249 /* store the column values to the file */ 1250 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1251 if (!rank) { 1252 MPI_Status status; 1253 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1254 for (i=1; i<size; i++) { 1255 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1256 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1257 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1258 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1260 } 1261 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1262 } else { 1263 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1264 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1265 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1266 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1267 } 1268 ierr = PetscFree(column_values);CHKERRQ(ierr); 1269 1270 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1271 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #include <petscdraw.h> 1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1277 { 1278 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1279 PetscErrorCode ierr; 1280 PetscMPIInt rank = aij->rank,size = aij->size; 1281 PetscBool isdraw,iascii,isbinary; 1282 PetscViewer sviewer; 1283 PetscViewerFormat format; 1284 1285 PetscFunctionBegin; 1286 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1287 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1288 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1289 if (iascii) { 1290 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1291 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1292 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1293 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1294 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1295 for (i=0; i<(PetscInt)size; i++) { 1296 nmax = PetscMax(nmax,nz[i]); 1297 nmin = PetscMin(nmin,nz[i]); 1298 navg += nz[i]; 1299 } 1300 ierr = PetscFree(nz);CHKERRQ(ierr); 1301 navg = navg/size; 1302 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1306 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1307 MatInfo info; 1308 PetscBool inodes; 1309 1310 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1311 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1312 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1313 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1314 if (!inodes) { 1315 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1316 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1317 } else { 1318 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1319 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1320 } 1321 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1323 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1324 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1325 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1326 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1328 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1331 PetscInt inodecount,inodelimit,*inodes; 1332 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1333 if (inodes) { 1334 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1335 } else { 1336 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1337 } 1338 PetscFunctionReturn(0); 1339 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1340 PetscFunctionReturn(0); 1341 } 1342 } else if (isbinary) { 1343 if (size == 1) { 1344 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1345 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1346 } else { 1347 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1348 } 1349 PetscFunctionReturn(0); 1350 } else if (isdraw) { 1351 PetscDraw draw; 1352 PetscBool isnull; 1353 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1354 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1355 if (isnull) PetscFunctionReturn(0); 1356 } 1357 1358 { 1359 /* assemble the entire matrix onto first processor. */ 1360 Mat A; 1361 Mat_SeqAIJ *Aloc; 1362 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1363 MatScalar *a; 1364 1365 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1366 if (!rank) { 1367 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1368 } else { 1369 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1370 } 1371 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1372 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1373 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1374 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1375 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1376 1377 /* copy over the A part */ 1378 Aloc = (Mat_SeqAIJ*)aij->A->data; 1379 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1380 row = mat->rmap->rstart; 1381 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1382 for (i=0; i<m; i++) { 1383 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1384 row++; 1385 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1386 } 1387 aj = Aloc->j; 1388 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1389 1390 /* copy over the B part */ 1391 Aloc = (Mat_SeqAIJ*)aij->B->data; 1392 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1393 row = mat->rmap->rstart; 1394 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1395 ct = cols; 1396 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1397 for (i=0; i<m; i++) { 1398 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1399 row++; 1400 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1401 } 1402 ierr = PetscFree(ct);CHKERRQ(ierr); 1403 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1404 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1405 /* 1406 Everyone has to call to draw the matrix since the graphics waits are 1407 synchronized across all processors that share the PetscDraw object 1408 */ 1409 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1410 if (!rank) { 1411 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1412 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1413 } 1414 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1415 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1416 ierr = MatDestroy(&A);CHKERRQ(ierr); 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1422 { 1423 PetscErrorCode ierr; 1424 PetscBool iascii,isdraw,issocket,isbinary; 1425 1426 PetscFunctionBegin; 1427 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1428 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1429 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1430 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1431 if (iascii || isdraw || isbinary || issocket) { 1432 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1433 } 1434 PetscFunctionReturn(0); 1435 } 1436 1437 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1438 { 1439 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1440 PetscErrorCode ierr; 1441 Vec bb1 = 0; 1442 PetscBool hasop; 1443 1444 PetscFunctionBegin; 1445 if (flag == SOR_APPLY_UPPER) { 1446 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1447 PetscFunctionReturn(0); 1448 } 1449 1450 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1451 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1452 } 1453 1454 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1455 if (flag & SOR_ZERO_INITIAL_GUESS) { 1456 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1457 its--; 1458 } 1459 1460 while (its--) { 1461 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1462 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1463 1464 /* update rhs: bb1 = bb - B*x */ 1465 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1466 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1467 1468 /* local sweep */ 1469 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1470 } 1471 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1472 if (flag & SOR_ZERO_INITIAL_GUESS) { 1473 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1474 its--; 1475 } 1476 while (its--) { 1477 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1478 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1479 1480 /* update rhs: bb1 = bb - B*x */ 1481 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1482 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1483 1484 /* local sweep */ 1485 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1486 } 1487 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1488 if (flag & SOR_ZERO_INITIAL_GUESS) { 1489 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1490 its--; 1491 } 1492 while (its--) { 1493 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1494 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1495 1496 /* update rhs: bb1 = bb - B*x */ 1497 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1498 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1499 1500 /* local sweep */ 1501 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1502 } 1503 } else if (flag & SOR_EISENSTAT) { 1504 Vec xx1; 1505 1506 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1507 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1508 1509 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1510 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 if (!mat->diag) { 1512 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1513 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1514 } 1515 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1516 if (hasop) { 1517 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1518 } else { 1519 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1520 } 1521 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1522 1523 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1524 1525 /* local sweep */ 1526 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1527 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1528 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1529 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1530 1531 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1532 1533 matin->factorerrortype = mat->A->factorerrortype; 1534 PetscFunctionReturn(0); 1535 } 1536 1537 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1538 { 1539 Mat aA,aB,Aperm; 1540 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1541 PetscScalar *aa,*ba; 1542 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1543 PetscSF rowsf,sf; 1544 IS parcolp = NULL; 1545 PetscBool done; 1546 PetscErrorCode ierr; 1547 1548 PetscFunctionBegin; 1549 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1550 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1551 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1552 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1553 1554 /* Invert row permutation to find out where my rows should go */ 1555 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1556 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1557 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1558 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1559 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1560 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1561 1562 /* Invert column permutation to find out where my columns should go */ 1563 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1564 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1565 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1566 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1567 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1568 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1569 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1570 1571 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1572 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1573 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1574 1575 /* Find out where my gcols should go */ 1576 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1577 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1578 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1579 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1580 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1581 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1582 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1583 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1584 1585 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1586 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1587 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1588 for (i=0; i<m; i++) { 1589 PetscInt row = rdest[i],rowner; 1590 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1591 for (j=ai[i]; j<ai[i+1]; j++) { 1592 PetscInt cowner,col = cdest[aj[j]]; 1593 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1594 if (rowner == cowner) dnnz[i]++; 1595 else onnz[i]++; 1596 } 1597 for (j=bi[i]; j<bi[i+1]; j++) { 1598 PetscInt cowner,col = gcdest[bj[j]]; 1599 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1600 if (rowner == cowner) dnnz[i]++; 1601 else onnz[i]++; 1602 } 1603 } 1604 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1605 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1606 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1607 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1608 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1609 1610 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1611 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1612 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1613 for (i=0; i<m; i++) { 1614 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1615 PetscInt j0,rowlen; 1616 rowlen = ai[i+1] - ai[i]; 1617 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1618 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1619 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1620 } 1621 rowlen = bi[i+1] - bi[i]; 1622 for (j0=j=0; j<rowlen; j0=j) { 1623 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1624 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1625 } 1626 } 1627 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1628 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1629 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1630 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1631 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1632 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1633 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1634 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1635 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1636 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1637 *B = Aperm; 1638 PetscFunctionReturn(0); 1639 } 1640 1641 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1642 { 1643 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1644 PetscErrorCode ierr; 1645 1646 PetscFunctionBegin; 1647 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1648 if (ghosts) *ghosts = aij->garray; 1649 PetscFunctionReturn(0); 1650 } 1651 1652 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1653 { 1654 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1655 Mat A = mat->A,B = mat->B; 1656 PetscErrorCode ierr; 1657 PetscReal isend[5],irecv[5]; 1658 1659 PetscFunctionBegin; 1660 info->block_size = 1.0; 1661 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1662 1663 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1664 isend[3] = info->memory; isend[4] = info->mallocs; 1665 1666 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1667 1668 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1669 isend[3] += info->memory; isend[4] += info->mallocs; 1670 if (flag == MAT_LOCAL) { 1671 info->nz_used = isend[0]; 1672 info->nz_allocated = isend[1]; 1673 info->nz_unneeded = isend[2]; 1674 info->memory = isend[3]; 1675 info->mallocs = isend[4]; 1676 } else if (flag == MAT_GLOBAL_MAX) { 1677 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1678 1679 info->nz_used = irecv[0]; 1680 info->nz_allocated = irecv[1]; 1681 info->nz_unneeded = irecv[2]; 1682 info->memory = irecv[3]; 1683 info->mallocs = irecv[4]; 1684 } else if (flag == MAT_GLOBAL_SUM) { 1685 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1686 1687 info->nz_used = irecv[0]; 1688 info->nz_allocated = irecv[1]; 1689 info->nz_unneeded = irecv[2]; 1690 info->memory = irecv[3]; 1691 info->mallocs = irecv[4]; 1692 } 1693 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1694 info->fill_ratio_needed = 0; 1695 info->factor_mallocs = 0; 1696 PetscFunctionReturn(0); 1697 } 1698 1699 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1700 { 1701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1702 PetscErrorCode ierr; 1703 1704 PetscFunctionBegin; 1705 switch (op) { 1706 case MAT_NEW_NONZERO_LOCATIONS: 1707 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1708 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1709 case MAT_KEEP_NONZERO_PATTERN: 1710 case MAT_NEW_NONZERO_LOCATION_ERR: 1711 case MAT_USE_INODES: 1712 case MAT_IGNORE_ZERO_ENTRIES: 1713 MatCheckPreallocated(A,1); 1714 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1715 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1716 break; 1717 case MAT_ROW_ORIENTED: 1718 MatCheckPreallocated(A,1); 1719 a->roworiented = flg; 1720 1721 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1722 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1723 break; 1724 case MAT_NEW_DIAGONALS: 1725 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1726 break; 1727 case MAT_IGNORE_OFF_PROC_ENTRIES: 1728 a->donotstash = flg; 1729 break; 1730 case MAT_SPD: 1731 A->spd_set = PETSC_TRUE; 1732 A->spd = flg; 1733 if (flg) { 1734 A->symmetric = PETSC_TRUE; 1735 A->structurally_symmetric = PETSC_TRUE; 1736 A->symmetric_set = PETSC_TRUE; 1737 A->structurally_symmetric_set = PETSC_TRUE; 1738 } 1739 break; 1740 case MAT_SYMMETRIC: 1741 MatCheckPreallocated(A,1); 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 break; 1744 case MAT_STRUCTURALLY_SYMMETRIC: 1745 MatCheckPreallocated(A,1); 1746 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1747 break; 1748 case MAT_HERMITIAN: 1749 MatCheckPreallocated(A,1); 1750 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1751 break; 1752 case MAT_SYMMETRY_ETERNAL: 1753 MatCheckPreallocated(A,1); 1754 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1755 break; 1756 case MAT_SUBMAT_SINGLEIS: 1757 A->submat_singleis = flg; 1758 break; 1759 case MAT_STRUCTURE_ONLY: 1760 /* The option is handled directly by MatSetOption() */ 1761 break; 1762 default: 1763 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1764 } 1765 PetscFunctionReturn(0); 1766 } 1767 1768 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1769 { 1770 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1771 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1772 PetscErrorCode ierr; 1773 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1774 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1775 PetscInt *cmap,*idx_p; 1776 1777 PetscFunctionBegin; 1778 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1779 mat->getrowactive = PETSC_TRUE; 1780 1781 if (!mat->rowvalues && (idx || v)) { 1782 /* 1783 allocate enough space to hold information from the longest row. 1784 */ 1785 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1786 PetscInt max = 1,tmp; 1787 for (i=0; i<matin->rmap->n; i++) { 1788 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1789 if (max < tmp) max = tmp; 1790 } 1791 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1792 } 1793 1794 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1795 lrow = row - rstart; 1796 1797 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1798 if (!v) {pvA = 0; pvB = 0;} 1799 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1800 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1801 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1802 nztot = nzA + nzB; 1803 1804 cmap = mat->garray; 1805 if (v || idx) { 1806 if (nztot) { 1807 /* Sort by increasing column numbers, assuming A and B already sorted */ 1808 PetscInt imark = -1; 1809 if (v) { 1810 *v = v_p = mat->rowvalues; 1811 for (i=0; i<nzB; i++) { 1812 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1813 else break; 1814 } 1815 imark = i; 1816 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1817 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1818 } 1819 if (idx) { 1820 *idx = idx_p = mat->rowindices; 1821 if (imark > -1) { 1822 for (i=0; i<imark; i++) { 1823 idx_p[i] = cmap[cworkB[i]]; 1824 } 1825 } else { 1826 for (i=0; i<nzB; i++) { 1827 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1828 else break; 1829 } 1830 imark = i; 1831 } 1832 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1833 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1834 } 1835 } else { 1836 if (idx) *idx = 0; 1837 if (v) *v = 0; 1838 } 1839 } 1840 *nz = nztot; 1841 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1842 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1847 { 1848 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1849 1850 PetscFunctionBegin; 1851 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1852 aij->getrowactive = PETSC_FALSE; 1853 PetscFunctionReturn(0); 1854 } 1855 1856 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1857 { 1858 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1859 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1860 PetscErrorCode ierr; 1861 PetscInt i,j,cstart = mat->cmap->rstart; 1862 PetscReal sum = 0.0; 1863 MatScalar *v; 1864 1865 PetscFunctionBegin; 1866 if (aij->size == 1) { 1867 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1868 } else { 1869 if (type == NORM_FROBENIUS) { 1870 v = amat->a; 1871 for (i=0; i<amat->nz; i++) { 1872 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1873 } 1874 v = bmat->a; 1875 for (i=0; i<bmat->nz; i++) { 1876 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1877 } 1878 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1879 *norm = PetscSqrtReal(*norm); 1880 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1881 } else if (type == NORM_1) { /* max column norm */ 1882 PetscReal *tmp,*tmp2; 1883 PetscInt *jj,*garray = aij->garray; 1884 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1885 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1886 *norm = 0.0; 1887 v = amat->a; jj = amat->j; 1888 for (j=0; j<amat->nz; j++) { 1889 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1890 } 1891 v = bmat->a; jj = bmat->j; 1892 for (j=0; j<bmat->nz; j++) { 1893 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1894 } 1895 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1896 for (j=0; j<mat->cmap->N; j++) { 1897 if (tmp2[j] > *norm) *norm = tmp2[j]; 1898 } 1899 ierr = PetscFree(tmp);CHKERRQ(ierr); 1900 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1901 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1902 } else if (type == NORM_INFINITY) { /* max row norm */ 1903 PetscReal ntemp = 0.0; 1904 for (j=0; j<aij->A->rmap->n; j++) { 1905 v = amat->a + amat->i[j]; 1906 sum = 0.0; 1907 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1908 sum += PetscAbsScalar(*v); v++; 1909 } 1910 v = bmat->a + bmat->i[j]; 1911 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1912 sum += PetscAbsScalar(*v); v++; 1913 } 1914 if (sum > ntemp) ntemp = sum; 1915 } 1916 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1917 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1918 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1919 } 1920 PetscFunctionReturn(0); 1921 } 1922 1923 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1924 { 1925 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1926 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1927 PetscErrorCode ierr; 1928 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1929 PetscInt cstart = A->cmap->rstart,ncol; 1930 Mat B; 1931 MatScalar *array; 1932 1933 PetscFunctionBegin; 1934 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1935 ai = Aloc->i; aj = Aloc->j; 1936 bi = Bloc->i; bj = Bloc->j; 1937 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1938 PetscInt *d_nnz,*g_nnz,*o_nnz; 1939 PetscSFNode *oloc; 1940 PETSC_UNUSED PetscSF sf; 1941 1942 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1943 /* compute d_nnz for preallocation */ 1944 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1945 for (i=0; i<ai[ma]; i++) { 1946 d_nnz[aj[i]]++; 1947 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1948 } 1949 /* compute local off-diagonal contributions */ 1950 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1951 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1952 /* map those to global */ 1953 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1954 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1955 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1956 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1957 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1958 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1959 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1960 1961 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1962 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1963 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1964 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1965 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1966 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1967 } else { 1968 B = *matout; 1969 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1970 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1971 } 1972 1973 /* copy over the A part */ 1974 array = Aloc->a; 1975 row = A->rmap->rstart; 1976 for (i=0; i<ma; i++) { 1977 ncol = ai[i+1]-ai[i]; 1978 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1979 row++; 1980 array += ncol; aj += ncol; 1981 } 1982 aj = Aloc->j; 1983 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1984 1985 /* copy over the B part */ 1986 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1987 array = Bloc->a; 1988 row = A->rmap->rstart; 1989 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1990 cols_tmp = cols; 1991 for (i=0; i<mb; i++) { 1992 ncol = bi[i+1]-bi[i]; 1993 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1994 row++; 1995 array += ncol; cols_tmp += ncol; 1996 } 1997 ierr = PetscFree(cols);CHKERRQ(ierr); 1998 1999 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2000 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2001 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2002 *matout = B; 2003 } else { 2004 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2005 } 2006 PetscFunctionReturn(0); 2007 } 2008 2009 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2010 { 2011 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2012 Mat a = aij->A,b = aij->B; 2013 PetscErrorCode ierr; 2014 PetscInt s1,s2,s3; 2015 2016 PetscFunctionBegin; 2017 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2018 if (rr) { 2019 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2020 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2021 /* Overlap communication with computation. */ 2022 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2023 } 2024 if (ll) { 2025 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2026 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2027 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2028 } 2029 /* scale the diagonal block */ 2030 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2031 2032 if (rr) { 2033 /* Do a scatter end and then right scale the off-diagonal block */ 2034 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2035 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2036 } 2037 PetscFunctionReturn(0); 2038 } 2039 2040 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2041 { 2042 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2043 PetscErrorCode ierr; 2044 2045 PetscFunctionBegin; 2046 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2047 PetscFunctionReturn(0); 2048 } 2049 2050 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2051 { 2052 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2053 Mat a,b,c,d; 2054 PetscBool flg; 2055 PetscErrorCode ierr; 2056 2057 PetscFunctionBegin; 2058 a = matA->A; b = matA->B; 2059 c = matB->A; d = matB->B; 2060 2061 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2062 if (flg) { 2063 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2064 } 2065 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2066 PetscFunctionReturn(0); 2067 } 2068 2069 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2070 { 2071 PetscErrorCode ierr; 2072 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2073 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2074 2075 PetscFunctionBegin; 2076 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2077 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2078 /* because of the column compression in the off-processor part of the matrix a->B, 2079 the number of columns in a->B and b->B may be different, hence we cannot call 2080 the MatCopy() directly on the two parts. If need be, we can provide a more 2081 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2082 then copying the submatrices */ 2083 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2084 } else { 2085 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2086 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2087 } 2088 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2089 PetscFunctionReturn(0); 2090 } 2091 2092 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2093 { 2094 PetscErrorCode ierr; 2095 2096 PetscFunctionBegin; 2097 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2098 PetscFunctionReturn(0); 2099 } 2100 2101 /* 2102 Computes the number of nonzeros per row needed for preallocation when X and Y 2103 have different nonzero structure. 2104 */ 2105 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2106 { 2107 PetscInt i,j,k,nzx,nzy; 2108 2109 PetscFunctionBegin; 2110 /* Set the number of nonzeros in the new matrix */ 2111 for (i=0; i<m; i++) { 2112 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2113 nzx = xi[i+1] - xi[i]; 2114 nzy = yi[i+1] - yi[i]; 2115 nnz[i] = 0; 2116 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2117 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2118 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2119 nnz[i]++; 2120 } 2121 for (; k<nzy; k++) nnz[i]++; 2122 } 2123 PetscFunctionReturn(0); 2124 } 2125 2126 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2127 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2128 { 2129 PetscErrorCode ierr; 2130 PetscInt m = Y->rmap->N; 2131 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2132 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2133 2134 PetscFunctionBegin; 2135 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2140 { 2141 PetscErrorCode ierr; 2142 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2143 PetscBLASInt bnz,one=1; 2144 Mat_SeqAIJ *x,*y; 2145 2146 PetscFunctionBegin; 2147 if (str == SAME_NONZERO_PATTERN) { 2148 PetscScalar alpha = a; 2149 x = (Mat_SeqAIJ*)xx->A->data; 2150 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2151 y = (Mat_SeqAIJ*)yy->A->data; 2152 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2153 x = (Mat_SeqAIJ*)xx->B->data; 2154 y = (Mat_SeqAIJ*)yy->B->data; 2155 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2156 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2157 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2158 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2159 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2160 } else { 2161 Mat B; 2162 PetscInt *nnz_d,*nnz_o; 2163 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2164 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2165 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2166 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2167 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2168 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2169 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2170 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2171 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2172 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2173 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2174 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2175 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2176 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2177 } 2178 PetscFunctionReturn(0); 2179 } 2180 2181 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2182 2183 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2184 { 2185 #if defined(PETSC_USE_COMPLEX) 2186 PetscErrorCode ierr; 2187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2188 2189 PetscFunctionBegin; 2190 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2191 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2192 #else 2193 PetscFunctionBegin; 2194 #endif 2195 PetscFunctionReturn(0); 2196 } 2197 2198 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2199 { 2200 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2201 PetscErrorCode ierr; 2202 2203 PetscFunctionBegin; 2204 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2205 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2206 PetscFunctionReturn(0); 2207 } 2208 2209 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2210 { 2211 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2212 PetscErrorCode ierr; 2213 2214 PetscFunctionBegin; 2215 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2216 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2217 PetscFunctionReturn(0); 2218 } 2219 2220 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2221 { 2222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2223 PetscErrorCode ierr; 2224 PetscInt i,*idxb = 0; 2225 PetscScalar *va,*vb; 2226 Vec vtmp; 2227 2228 PetscFunctionBegin; 2229 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2230 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2231 if (idx) { 2232 for (i=0; i<A->rmap->n; i++) { 2233 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2234 } 2235 } 2236 2237 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2238 if (idx) { 2239 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2240 } 2241 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2242 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2243 2244 for (i=0; i<A->rmap->n; i++) { 2245 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2246 va[i] = vb[i]; 2247 if (idx) idx[i] = a->garray[idxb[i]]; 2248 } 2249 } 2250 2251 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2252 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2253 ierr = PetscFree(idxb);CHKERRQ(ierr); 2254 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2255 PetscFunctionReturn(0); 2256 } 2257 2258 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2259 { 2260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2261 PetscErrorCode ierr; 2262 PetscInt i,*idxb = 0; 2263 PetscScalar *va,*vb; 2264 Vec vtmp; 2265 2266 PetscFunctionBegin; 2267 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2268 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2269 if (idx) { 2270 for (i=0; i<A->cmap->n; i++) { 2271 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2272 } 2273 } 2274 2275 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2276 if (idx) { 2277 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2278 } 2279 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2280 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2281 2282 for (i=0; i<A->rmap->n; i++) { 2283 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2284 va[i] = vb[i]; 2285 if (idx) idx[i] = a->garray[idxb[i]]; 2286 } 2287 } 2288 2289 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2290 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2291 ierr = PetscFree(idxb);CHKERRQ(ierr); 2292 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2293 PetscFunctionReturn(0); 2294 } 2295 2296 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2297 { 2298 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2299 PetscInt n = A->rmap->n; 2300 PetscInt cstart = A->cmap->rstart; 2301 PetscInt *cmap = mat->garray; 2302 PetscInt *diagIdx, *offdiagIdx; 2303 Vec diagV, offdiagV; 2304 PetscScalar *a, *diagA, *offdiagA; 2305 PetscInt r; 2306 PetscErrorCode ierr; 2307 2308 PetscFunctionBegin; 2309 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2310 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2311 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2312 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2313 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2314 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2315 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2316 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2317 for (r = 0; r < n; ++r) { 2318 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2319 a[r] = diagA[r]; 2320 idx[r] = cstart + diagIdx[r]; 2321 } else { 2322 a[r] = offdiagA[r]; 2323 idx[r] = cmap[offdiagIdx[r]]; 2324 } 2325 } 2326 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2327 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2328 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2329 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2330 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2331 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2332 PetscFunctionReturn(0); 2333 } 2334 2335 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2336 { 2337 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2338 PetscInt n = A->rmap->n; 2339 PetscInt cstart = A->cmap->rstart; 2340 PetscInt *cmap = mat->garray; 2341 PetscInt *diagIdx, *offdiagIdx; 2342 Vec diagV, offdiagV; 2343 PetscScalar *a, *diagA, *offdiagA; 2344 PetscInt r; 2345 PetscErrorCode ierr; 2346 2347 PetscFunctionBegin; 2348 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2349 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2350 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2351 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2352 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2353 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2354 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2355 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2356 for (r = 0; r < n; ++r) { 2357 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2358 a[r] = diagA[r]; 2359 idx[r] = cstart + diagIdx[r]; 2360 } else { 2361 a[r] = offdiagA[r]; 2362 idx[r] = cmap[offdiagIdx[r]]; 2363 } 2364 } 2365 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2366 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2367 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2368 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2369 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2370 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2371 PetscFunctionReturn(0); 2372 } 2373 2374 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2375 { 2376 PetscErrorCode ierr; 2377 Mat *dummy; 2378 2379 PetscFunctionBegin; 2380 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2381 *newmat = *dummy; 2382 ierr = PetscFree(dummy);CHKERRQ(ierr); 2383 PetscFunctionReturn(0); 2384 } 2385 2386 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2387 { 2388 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2393 A->factorerrortype = a->A->factorerrortype; 2394 PetscFunctionReturn(0); 2395 } 2396 2397 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2398 { 2399 PetscErrorCode ierr; 2400 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2401 2402 PetscFunctionBegin; 2403 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2404 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2405 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2406 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2411 { 2412 PetscFunctionBegin; 2413 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2414 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2415 PetscFunctionReturn(0); 2416 } 2417 2418 /*@ 2419 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2420 2421 Collective on Mat 2422 2423 Input Parameters: 2424 + A - the matrix 2425 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2426 2427 Level: advanced 2428 2429 @*/ 2430 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2431 { 2432 PetscErrorCode ierr; 2433 2434 PetscFunctionBegin; 2435 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2440 { 2441 PetscErrorCode ierr; 2442 PetscBool sc = PETSC_FALSE,flg; 2443 2444 PetscFunctionBegin; 2445 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2446 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2447 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2448 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2449 if (flg) { 2450 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2451 } 2452 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2457 { 2458 PetscErrorCode ierr; 2459 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2460 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2461 2462 PetscFunctionBegin; 2463 if (!Y->preallocated) { 2464 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2465 } else if (!aij->nz) { 2466 PetscInt nonew = aij->nonew; 2467 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2468 aij->nonew = nonew; 2469 } 2470 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2471 PetscFunctionReturn(0); 2472 } 2473 2474 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2475 { 2476 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2481 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2482 if (d) { 2483 PetscInt rstart; 2484 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2485 *d += rstart; 2486 2487 } 2488 PetscFunctionReturn(0); 2489 } 2490 2491 2492 /* -------------------------------------------------------------------*/ 2493 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2494 MatGetRow_MPIAIJ, 2495 MatRestoreRow_MPIAIJ, 2496 MatMult_MPIAIJ, 2497 /* 4*/ MatMultAdd_MPIAIJ, 2498 MatMultTranspose_MPIAIJ, 2499 MatMultTransposeAdd_MPIAIJ, 2500 0, 2501 0, 2502 0, 2503 /*10*/ 0, 2504 0, 2505 0, 2506 MatSOR_MPIAIJ, 2507 MatTranspose_MPIAIJ, 2508 /*15*/ MatGetInfo_MPIAIJ, 2509 MatEqual_MPIAIJ, 2510 MatGetDiagonal_MPIAIJ, 2511 MatDiagonalScale_MPIAIJ, 2512 MatNorm_MPIAIJ, 2513 /*20*/ MatAssemblyBegin_MPIAIJ, 2514 MatAssemblyEnd_MPIAIJ, 2515 MatSetOption_MPIAIJ, 2516 MatZeroEntries_MPIAIJ, 2517 /*24*/ MatZeroRows_MPIAIJ, 2518 0, 2519 0, 2520 0, 2521 0, 2522 /*29*/ MatSetUp_MPIAIJ, 2523 0, 2524 0, 2525 MatGetDiagonalBlock_MPIAIJ, 2526 0, 2527 /*34*/ MatDuplicate_MPIAIJ, 2528 0, 2529 0, 2530 0, 2531 0, 2532 /*39*/ MatAXPY_MPIAIJ, 2533 MatCreateSubMatrices_MPIAIJ, 2534 MatIncreaseOverlap_MPIAIJ, 2535 MatGetValues_MPIAIJ, 2536 MatCopy_MPIAIJ, 2537 /*44*/ MatGetRowMax_MPIAIJ, 2538 MatScale_MPIAIJ, 2539 MatShift_MPIAIJ, 2540 MatDiagonalSet_MPIAIJ, 2541 MatZeroRowsColumns_MPIAIJ, 2542 /*49*/ MatSetRandom_MPIAIJ, 2543 0, 2544 0, 2545 0, 2546 0, 2547 /*54*/ MatFDColoringCreate_MPIXAIJ, 2548 0, 2549 MatSetUnfactored_MPIAIJ, 2550 MatPermute_MPIAIJ, 2551 0, 2552 /*59*/ MatCreateSubMatrix_MPIAIJ, 2553 MatDestroy_MPIAIJ, 2554 MatView_MPIAIJ, 2555 0, 2556 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2557 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2558 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2559 0, 2560 0, 2561 0, 2562 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2563 MatGetRowMinAbs_MPIAIJ, 2564 0, 2565 0, 2566 0, 2567 0, 2568 /*75*/ MatFDColoringApply_AIJ, 2569 MatSetFromOptions_MPIAIJ, 2570 0, 2571 0, 2572 MatFindZeroDiagonals_MPIAIJ, 2573 /*80*/ 0, 2574 0, 2575 0, 2576 /*83*/ MatLoad_MPIAIJ, 2577 MatIsSymmetric_MPIAIJ, 2578 0, 2579 0, 2580 0, 2581 0, 2582 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2583 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2584 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2585 MatPtAP_MPIAIJ_MPIAIJ, 2586 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2587 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2588 0, 2589 0, 2590 0, 2591 0, 2592 /*99*/ 0, 2593 0, 2594 0, 2595 MatConjugate_MPIAIJ, 2596 0, 2597 /*104*/MatSetValuesRow_MPIAIJ, 2598 MatRealPart_MPIAIJ, 2599 MatImaginaryPart_MPIAIJ, 2600 0, 2601 0, 2602 /*109*/0, 2603 0, 2604 MatGetRowMin_MPIAIJ, 2605 0, 2606 MatMissingDiagonal_MPIAIJ, 2607 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2608 0, 2609 MatGetGhosts_MPIAIJ, 2610 0, 2611 0, 2612 /*119*/0, 2613 0, 2614 0, 2615 0, 2616 MatGetMultiProcBlock_MPIAIJ, 2617 /*124*/MatFindNonzeroRows_MPIAIJ, 2618 MatGetColumnNorms_MPIAIJ, 2619 MatInvertBlockDiagonal_MPIAIJ, 2620 0, 2621 MatCreateSubMatricesMPI_MPIAIJ, 2622 /*129*/0, 2623 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2624 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2625 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2626 0, 2627 /*134*/0, 2628 0, 2629 MatRARt_MPIAIJ_MPIAIJ, 2630 0, 2631 0, 2632 /*139*/MatSetBlockSizes_MPIAIJ, 2633 0, 2634 0, 2635 MatFDColoringSetUp_MPIXAIJ, 2636 MatFindOffBlockDiagonalEntries_MPIAIJ, 2637 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2638 }; 2639 2640 /* ----------------------------------------------------------------------------------------*/ 2641 2642 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2643 { 2644 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2645 PetscErrorCode ierr; 2646 2647 PetscFunctionBegin; 2648 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2649 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2650 PetscFunctionReturn(0); 2651 } 2652 2653 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2654 { 2655 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2656 PetscErrorCode ierr; 2657 2658 PetscFunctionBegin; 2659 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2660 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2665 { 2666 Mat_MPIAIJ *b; 2667 PetscErrorCode ierr; 2668 2669 PetscFunctionBegin; 2670 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2671 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2672 b = (Mat_MPIAIJ*)B->data; 2673 2674 #if defined(PETSC_USE_CTABLE) 2675 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2676 #else 2677 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2678 #endif 2679 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2680 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2681 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2682 2683 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2684 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2685 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2686 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2687 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2688 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2689 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2690 2691 if (!B->preallocated) { 2692 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2693 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2694 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2695 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2696 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2697 } 2698 2699 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2700 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2701 B->preallocated = PETSC_TRUE; 2702 B->was_assembled = PETSC_FALSE; 2703 B->assembled = PETSC_FALSE;; 2704 PetscFunctionReturn(0); 2705 } 2706 2707 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2708 { 2709 Mat_MPIAIJ *b; 2710 PetscErrorCode ierr; 2711 2712 PetscFunctionBegin; 2713 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2714 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2715 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2716 b = (Mat_MPIAIJ*)B->data; 2717 2718 #if defined(PETSC_USE_CTABLE) 2719 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2720 #else 2721 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2722 #endif 2723 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2724 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2725 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2726 2727 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2728 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2729 B->preallocated = PETSC_TRUE; 2730 B->was_assembled = PETSC_FALSE; 2731 B->assembled = PETSC_FALSE; 2732 PetscFunctionReturn(0); 2733 } 2734 2735 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2736 { 2737 Mat mat; 2738 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2739 PetscErrorCode ierr; 2740 2741 PetscFunctionBegin; 2742 *newmat = 0; 2743 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2744 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2745 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2746 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2747 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2748 a = (Mat_MPIAIJ*)mat->data; 2749 2750 mat->factortype = matin->factortype; 2751 mat->assembled = PETSC_TRUE; 2752 mat->insertmode = NOT_SET_VALUES; 2753 mat->preallocated = PETSC_TRUE; 2754 2755 a->size = oldmat->size; 2756 a->rank = oldmat->rank; 2757 a->donotstash = oldmat->donotstash; 2758 a->roworiented = oldmat->roworiented; 2759 a->rowindices = 0; 2760 a->rowvalues = 0; 2761 a->getrowactive = PETSC_FALSE; 2762 2763 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2764 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2765 2766 if (oldmat->colmap) { 2767 #if defined(PETSC_USE_CTABLE) 2768 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2769 #else 2770 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2771 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2772 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2773 #endif 2774 } else a->colmap = 0; 2775 if (oldmat->garray) { 2776 PetscInt len; 2777 len = oldmat->B->cmap->n; 2778 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2779 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2780 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2781 } else a->garray = 0; 2782 2783 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2785 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2786 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2787 2788 if (oldmat->Mvctx_mpi1) { 2789 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2791 } 2792 2793 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2795 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2796 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2797 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2798 *newmat = mat; 2799 PetscFunctionReturn(0); 2800 } 2801 2802 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2803 { 2804 PetscScalar *vals,*svals; 2805 MPI_Comm comm; 2806 PetscErrorCode ierr; 2807 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2808 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2809 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2810 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2811 PetscInt cend,cstart,n,*rowners; 2812 int fd; 2813 PetscInt bs = newMat->rmap->bs; 2814 2815 PetscFunctionBegin; 2816 /* force binary viewer to load .info file if it has not yet done so */ 2817 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2818 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2819 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2820 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2821 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2822 if (!rank) { 2823 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2824 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2825 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2826 } 2827 2828 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2829 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2830 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2831 if (bs < 0) bs = 1; 2832 2833 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2834 M = header[1]; N = header[2]; 2835 2836 /* If global sizes are set, check if they are consistent with that given in the file */ 2837 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2838 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2839 2840 /* determine ownership of all (block) rows */ 2841 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2842 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2843 else m = newMat->rmap->n; /* Set by user */ 2844 2845 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2846 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2847 2848 /* First process needs enough room for process with most rows */ 2849 if (!rank) { 2850 mmax = rowners[1]; 2851 for (i=2; i<=size; i++) { 2852 mmax = PetscMax(mmax, rowners[i]); 2853 } 2854 } else mmax = -1; /* unused, but compilers complain */ 2855 2856 rowners[0] = 0; 2857 for (i=2; i<=size; i++) { 2858 rowners[i] += rowners[i-1]; 2859 } 2860 rstart = rowners[rank]; 2861 rend = rowners[rank+1]; 2862 2863 /* distribute row lengths to all processors */ 2864 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2865 if (!rank) { 2866 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2867 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2868 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2869 for (j=0; j<m; j++) { 2870 procsnz[0] += ourlens[j]; 2871 } 2872 for (i=1; i<size; i++) { 2873 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2874 /* calculate the number of nonzeros on each processor */ 2875 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2876 procsnz[i] += rowlengths[j]; 2877 } 2878 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2879 } 2880 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2881 } else { 2882 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2883 } 2884 2885 if (!rank) { 2886 /* determine max buffer needed and allocate it */ 2887 maxnz = 0; 2888 for (i=0; i<size; i++) { 2889 maxnz = PetscMax(maxnz,procsnz[i]); 2890 } 2891 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2892 2893 /* read in my part of the matrix column indices */ 2894 nz = procsnz[0]; 2895 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2896 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2897 2898 /* read in every one elses and ship off */ 2899 for (i=1; i<size; i++) { 2900 nz = procsnz[i]; 2901 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2902 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2903 } 2904 ierr = PetscFree(cols);CHKERRQ(ierr); 2905 } else { 2906 /* determine buffer space needed for message */ 2907 nz = 0; 2908 for (i=0; i<m; i++) { 2909 nz += ourlens[i]; 2910 } 2911 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2912 2913 /* receive message of column indices*/ 2914 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2915 } 2916 2917 /* determine column ownership if matrix is not square */ 2918 if (N != M) { 2919 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2920 else n = newMat->cmap->n; 2921 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2922 cstart = cend - n; 2923 } else { 2924 cstart = rstart; 2925 cend = rend; 2926 n = cend - cstart; 2927 } 2928 2929 /* loop over local rows, determining number of off diagonal entries */ 2930 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2931 jj = 0; 2932 for (i=0; i<m; i++) { 2933 for (j=0; j<ourlens[i]; j++) { 2934 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2935 jj++; 2936 } 2937 } 2938 2939 for (i=0; i<m; i++) { 2940 ourlens[i] -= offlens[i]; 2941 } 2942 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2943 2944 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2945 2946 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2947 2948 for (i=0; i<m; i++) { 2949 ourlens[i] += offlens[i]; 2950 } 2951 2952 if (!rank) { 2953 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2954 2955 /* read in my part of the matrix numerical values */ 2956 nz = procsnz[0]; 2957 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2958 2959 /* insert into matrix */ 2960 jj = rstart; 2961 smycols = mycols; 2962 svals = vals; 2963 for (i=0; i<m; i++) { 2964 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2965 smycols += ourlens[i]; 2966 svals += ourlens[i]; 2967 jj++; 2968 } 2969 2970 /* read in other processors and ship out */ 2971 for (i=1; i<size; i++) { 2972 nz = procsnz[i]; 2973 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2974 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2975 } 2976 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2977 } else { 2978 /* receive numeric values */ 2979 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2980 2981 /* receive message of values*/ 2982 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2983 2984 /* insert into matrix */ 2985 jj = rstart; 2986 smycols = mycols; 2987 svals = vals; 2988 for (i=0; i<m; i++) { 2989 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2990 smycols += ourlens[i]; 2991 svals += ourlens[i]; 2992 jj++; 2993 } 2994 } 2995 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2996 ierr = PetscFree(vals);CHKERRQ(ierr); 2997 ierr = PetscFree(mycols);CHKERRQ(ierr); 2998 ierr = PetscFree(rowners);CHKERRQ(ierr); 2999 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3000 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3001 PetscFunctionReturn(0); 3002 } 3003 3004 /* Not scalable because of ISAllGather() unless getting all columns. */ 3005 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3006 { 3007 PetscErrorCode ierr; 3008 IS iscol_local; 3009 PetscBool isstride; 3010 PetscMPIInt lisstride=0,gisstride; 3011 3012 PetscFunctionBegin; 3013 /* check if we are grabbing all columns*/ 3014 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3015 3016 if (isstride) { 3017 PetscInt start,len,mstart,mlen; 3018 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3019 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3020 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3021 if (mstart == start && mlen-mstart == len) lisstride = 1; 3022 } 3023 3024 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3025 if (gisstride) { 3026 PetscInt N; 3027 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3028 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3029 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3030 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3031 } else { 3032 PetscInt cbs; 3033 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3034 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3035 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3036 } 3037 3038 *isseq = iscol_local; 3039 PetscFunctionReturn(0); 3040 } 3041 3042 /* 3043 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3044 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3045 3046 Input Parameters: 3047 mat - matrix 3048 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3049 i.e., mat->rstart <= isrow[i] < mat->rend 3050 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3051 i.e., mat->cstart <= iscol[i] < mat->cend 3052 Output Parameter: 3053 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3054 iscol_o - sequential column index set for retrieving mat->B 3055 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3056 */ 3057 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3058 { 3059 PetscErrorCode ierr; 3060 Vec x,cmap; 3061 const PetscInt *is_idx; 3062 PetscScalar *xarray,*cmaparray; 3063 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3064 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3065 Mat B=a->B; 3066 Vec lvec=a->lvec,lcmap; 3067 PetscInt i,cstart,cend,Bn=B->cmap->N; 3068 MPI_Comm comm; 3069 VecScatter Mvctx=a->Mvctx; 3070 3071 PetscFunctionBegin; 3072 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3073 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3074 3075 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3076 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3077 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3078 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3079 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3080 3081 /* Get start indices */ 3082 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3083 isstart -= ncols; 3084 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3085 3086 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3087 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3088 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3089 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3090 for (i=0; i<ncols; i++) { 3091 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3092 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3093 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3094 } 3095 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3096 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3097 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3098 3099 /* Get iscol_d */ 3100 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3101 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3102 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3103 3104 /* Get isrow_d */ 3105 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3106 rstart = mat->rmap->rstart; 3107 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3108 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3109 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3110 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3111 3112 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3113 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3114 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3115 3116 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3117 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3118 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3119 3120 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3121 3122 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3123 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3124 3125 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3126 /* off-process column indices */ 3127 count = 0; 3128 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3129 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3130 3131 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3132 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3133 for (i=0; i<Bn; i++) { 3134 if (PetscRealPart(xarray[i]) > -1.0) { 3135 idx[count] = i; /* local column index in off-diagonal part B */ 3136 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3137 count++; 3138 } 3139 } 3140 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3141 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3142 3143 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3144 /* cannot ensure iscol_o has same blocksize as iscol! */ 3145 3146 ierr = PetscFree(idx);CHKERRQ(ierr); 3147 *garray = cmap1; 3148 3149 ierr = VecDestroy(&x);CHKERRQ(ierr); 3150 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3151 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3152 PetscFunctionReturn(0); 3153 } 3154 3155 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3156 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3157 { 3158 PetscErrorCode ierr; 3159 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3160 Mat M = NULL; 3161 MPI_Comm comm; 3162 IS iscol_d,isrow_d,iscol_o; 3163 Mat Asub = NULL,Bsub = NULL; 3164 PetscInt n; 3165 3166 PetscFunctionBegin; 3167 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3168 3169 if (call == MAT_REUSE_MATRIX) { 3170 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3171 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3172 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3173 3174 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3175 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3176 3177 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3178 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3179 3180 /* Update diagonal and off-diagonal portions of submat */ 3181 asub = (Mat_MPIAIJ*)(*submat)->data; 3182 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3183 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3184 if (n) { 3185 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3186 } 3187 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3188 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3189 3190 } else { /* call == MAT_INITIAL_MATRIX) */ 3191 const PetscInt *garray; 3192 PetscInt BsubN; 3193 3194 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3195 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3196 3197 /* Create local submatrices Asub and Bsub */ 3198 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3199 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3200 3201 /* Create submatrix M */ 3202 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3203 3204 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3205 asub = (Mat_MPIAIJ*)M->data; 3206 3207 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3208 n = asub->B->cmap->N; 3209 if (BsubN > n) { 3210 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3211 const PetscInt *idx; 3212 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3213 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3214 3215 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3216 j = 0; 3217 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3218 for (i=0; i<n; i++) { 3219 if (j >= BsubN) break; 3220 while (subgarray[i] > garray[j]) j++; 3221 3222 if (subgarray[i] == garray[j]) { 3223 idx_new[i] = idx[j++]; 3224 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3225 } 3226 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3227 3228 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3229 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3230 3231 } else if (BsubN < n) { 3232 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3233 } 3234 3235 ierr = PetscFree(garray);CHKERRQ(ierr); 3236 *submat = M; 3237 3238 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3239 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3240 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3241 3242 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3243 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3244 3245 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3246 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3247 } 3248 PetscFunctionReturn(0); 3249 } 3250 3251 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3252 { 3253 PetscErrorCode ierr; 3254 IS iscol_local=NULL,isrow_d; 3255 PetscInt csize; 3256 PetscInt n,i,j,start,end; 3257 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3258 MPI_Comm comm; 3259 3260 PetscFunctionBegin; 3261 /* If isrow has same processor distribution as mat, 3262 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3263 if (call == MAT_REUSE_MATRIX) { 3264 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3265 if (isrow_d) { 3266 sameRowDist = PETSC_TRUE; 3267 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3268 } else { 3269 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3270 if (iscol_local) { 3271 sameRowDist = PETSC_TRUE; 3272 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3273 } 3274 } 3275 } else { 3276 /* Check if isrow has same processor distribution as mat */ 3277 sameDist[0] = PETSC_FALSE; 3278 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3279 if (!n) { 3280 sameDist[0] = PETSC_TRUE; 3281 } else { 3282 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3283 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3284 if (i >= start && j < end) { 3285 sameDist[0] = PETSC_TRUE; 3286 } 3287 } 3288 3289 /* Check if iscol has same processor distribution as mat */ 3290 sameDist[1] = PETSC_FALSE; 3291 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3292 if (!n) { 3293 sameDist[1] = PETSC_TRUE; 3294 } else { 3295 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3296 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3297 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3298 } 3299 3300 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3301 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3302 sameRowDist = tsameDist[0]; 3303 } 3304 3305 if (sameRowDist) { 3306 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3307 /* isrow and iscol have same processor distribution as mat */ 3308 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3309 PetscFunctionReturn(0); 3310 } else { /* sameRowDist */ 3311 /* isrow has same processor distribution as mat */ 3312 if (call == MAT_INITIAL_MATRIX) { 3313 PetscBool sorted; 3314 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3315 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3316 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3317 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3318 3319 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3320 if (sorted) { 3321 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3322 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3323 PetscFunctionReturn(0); 3324 } 3325 } else { /* call == MAT_REUSE_MATRIX */ 3326 IS iscol_sub; 3327 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3328 if (iscol_sub) { 3329 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3330 PetscFunctionReturn(0); 3331 } 3332 } 3333 } 3334 } 3335 3336 /* General case: iscol -> iscol_local which has global size of iscol */ 3337 if (call == MAT_REUSE_MATRIX) { 3338 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3339 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3340 } else { 3341 if (!iscol_local) { 3342 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3343 } 3344 } 3345 3346 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3347 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3348 3349 if (call == MAT_INITIAL_MATRIX) { 3350 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3351 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3352 } 3353 PetscFunctionReturn(0); 3354 } 3355 3356 /*@C 3357 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3358 and "off-diagonal" part of the matrix in CSR format. 3359 3360 Collective on MPI_Comm 3361 3362 Input Parameters: 3363 + comm - MPI communicator 3364 . A - "diagonal" portion of matrix 3365 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3366 - garray - global index of B columns 3367 3368 Output Parameter: 3369 . mat - the matrix, with input A as its local diagonal matrix 3370 Level: advanced 3371 3372 Notes: 3373 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3374 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3375 3376 .seealso: MatCreateMPIAIJWithSplitArrays() 3377 @*/ 3378 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3379 { 3380 PetscErrorCode ierr; 3381 Mat_MPIAIJ *maij; 3382 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3383 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3384 PetscScalar *oa=b->a; 3385 Mat Bnew; 3386 PetscInt m,n,N; 3387 3388 PetscFunctionBegin; 3389 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3390 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3391 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3392 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3393 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3394 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3395 3396 /* Get global columns of mat */ 3397 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3398 3399 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3400 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3401 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3402 maij = (Mat_MPIAIJ*)(*mat)->data; 3403 3404 (*mat)->preallocated = PETSC_TRUE; 3405 3406 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3407 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3408 3409 /* Set A as diagonal portion of *mat */ 3410 maij->A = A; 3411 3412 nz = oi[m]; 3413 for (i=0; i<nz; i++) { 3414 col = oj[i]; 3415 oj[i] = garray[col]; 3416 } 3417 3418 /* Set Bnew as off-diagonal portion of *mat */ 3419 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3420 bnew = (Mat_SeqAIJ*)Bnew->data; 3421 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3422 maij->B = Bnew; 3423 3424 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3425 3426 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3427 b->free_a = PETSC_FALSE; 3428 b->free_ij = PETSC_FALSE; 3429 ierr = MatDestroy(&B);CHKERRQ(ierr); 3430 3431 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3432 bnew->free_a = PETSC_TRUE; 3433 bnew->free_ij = PETSC_TRUE; 3434 3435 /* condense columns of maij->B */ 3436 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3437 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3438 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3439 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3440 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3441 PetscFunctionReturn(0); 3442 } 3443 3444 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3445 3446 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3447 { 3448 PetscErrorCode ierr; 3449 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3450 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3451 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3452 Mat M,Msub,B=a->B; 3453 MatScalar *aa; 3454 Mat_SeqAIJ *aij; 3455 PetscInt *garray = a->garray,*colsub,Ncols; 3456 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3457 IS iscol_sub,iscmap; 3458 const PetscInt *is_idx,*cmap; 3459 PetscBool allcolumns=PETSC_FALSE; 3460 MPI_Comm comm; 3461 3462 PetscFunctionBegin; 3463 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3464 3465 if (call == MAT_REUSE_MATRIX) { 3466 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3467 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3468 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3469 3470 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3471 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3472 3473 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3474 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3475 3476 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3477 3478 } else { /* call == MAT_INITIAL_MATRIX) */ 3479 PetscBool flg; 3480 3481 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3482 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3483 3484 /* (1) iscol -> nonscalable iscol_local */ 3485 /* Check for special case: each processor gets entire matrix columns */ 3486 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3487 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3488 if (allcolumns) { 3489 iscol_sub = iscol_local; 3490 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3491 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3492 3493 } else { 3494 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3495 PetscInt *idx,*cmap1,k; 3496 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3497 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3498 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3499 count = 0; 3500 k = 0; 3501 for (i=0; i<Ncols; i++) { 3502 j = is_idx[i]; 3503 if (j >= cstart && j < cend) { 3504 /* diagonal part of mat */ 3505 idx[count] = j; 3506 cmap1[count++] = i; /* column index in submat */ 3507 } else if (Bn) { 3508 /* off-diagonal part of mat */ 3509 if (j == garray[k]) { 3510 idx[count] = j; 3511 cmap1[count++] = i; /* column index in submat */ 3512 } else if (j > garray[k]) { 3513 while (j > garray[k] && k < Bn-1) k++; 3514 if (j == garray[k]) { 3515 idx[count] = j; 3516 cmap1[count++] = i; /* column index in submat */ 3517 } 3518 } 3519 } 3520 } 3521 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3522 3523 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3524 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3525 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3526 3527 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3528 } 3529 3530 /* (3) Create sequential Msub */ 3531 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3532 } 3533 3534 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3535 aij = (Mat_SeqAIJ*)(Msub)->data; 3536 ii = aij->i; 3537 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3538 3539 /* 3540 m - number of local rows 3541 Ncols - number of columns (same on all processors) 3542 rstart - first row in new global matrix generated 3543 */ 3544 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3545 3546 if (call == MAT_INITIAL_MATRIX) { 3547 /* (4) Create parallel newmat */ 3548 PetscMPIInt rank,size; 3549 PetscInt csize; 3550 3551 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3552 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3553 3554 /* 3555 Determine the number of non-zeros in the diagonal and off-diagonal 3556 portions of the matrix in order to do correct preallocation 3557 */ 3558 3559 /* first get start and end of "diagonal" columns */ 3560 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3561 if (csize == PETSC_DECIDE) { 3562 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3563 if (mglobal == Ncols) { /* square matrix */ 3564 nlocal = m; 3565 } else { 3566 nlocal = Ncols/size + ((Ncols % size) > rank); 3567 } 3568 } else { 3569 nlocal = csize; 3570 } 3571 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3572 rstart = rend - nlocal; 3573 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3574 3575 /* next, compute all the lengths */ 3576 jj = aij->j; 3577 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3578 olens = dlens + m; 3579 for (i=0; i<m; i++) { 3580 jend = ii[i+1] - ii[i]; 3581 olen = 0; 3582 dlen = 0; 3583 for (j=0; j<jend; j++) { 3584 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3585 else dlen++; 3586 jj++; 3587 } 3588 olens[i] = olen; 3589 dlens[i] = dlen; 3590 } 3591 3592 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3593 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3594 3595 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3596 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3597 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3598 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3599 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3600 ierr = PetscFree(dlens);CHKERRQ(ierr); 3601 3602 } else { /* call == MAT_REUSE_MATRIX */ 3603 M = *newmat; 3604 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3605 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3606 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3607 /* 3608 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3609 rather than the slower MatSetValues(). 3610 */ 3611 M->was_assembled = PETSC_TRUE; 3612 M->assembled = PETSC_FALSE; 3613 } 3614 3615 /* (5) Set values of Msub to *newmat */ 3616 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3617 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3618 3619 jj = aij->j; 3620 aa = aij->a; 3621 for (i=0; i<m; i++) { 3622 row = rstart + i; 3623 nz = ii[i+1] - ii[i]; 3624 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3625 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3626 jj += nz; aa += nz; 3627 } 3628 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3629 3630 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3631 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3632 3633 ierr = PetscFree(colsub);CHKERRQ(ierr); 3634 3635 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3636 if (call == MAT_INITIAL_MATRIX) { 3637 *newmat = M; 3638 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3639 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3640 3641 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3642 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3643 3644 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3645 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3646 3647 if (iscol_local) { 3648 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3649 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3650 } 3651 } 3652 PetscFunctionReturn(0); 3653 } 3654 3655 /* 3656 Not great since it makes two copies of the submatrix, first an SeqAIJ 3657 in local and then by concatenating the local matrices the end result. 3658 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3659 3660 Note: This requires a sequential iscol with all indices. 3661 */ 3662 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3663 { 3664 PetscErrorCode ierr; 3665 PetscMPIInt rank,size; 3666 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3667 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3668 Mat M,Mreuse; 3669 MatScalar *aa,*vwork; 3670 MPI_Comm comm; 3671 Mat_SeqAIJ *aij; 3672 PetscBool colflag,allcolumns=PETSC_FALSE; 3673 3674 PetscFunctionBegin; 3675 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3676 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3677 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3678 3679 /* Check for special case: each processor gets entire matrix columns */ 3680 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3681 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3682 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3683 3684 if (call == MAT_REUSE_MATRIX) { 3685 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3686 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3687 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3688 } else { 3689 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3690 } 3691 3692 /* 3693 m - number of local rows 3694 n - number of columns (same on all processors) 3695 rstart - first row in new global matrix generated 3696 */ 3697 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3698 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3699 if (call == MAT_INITIAL_MATRIX) { 3700 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3701 ii = aij->i; 3702 jj = aij->j; 3703 3704 /* 3705 Determine the number of non-zeros in the diagonal and off-diagonal 3706 portions of the matrix in order to do correct preallocation 3707 */ 3708 3709 /* first get start and end of "diagonal" columns */ 3710 if (csize == PETSC_DECIDE) { 3711 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3712 if (mglobal == n) { /* square matrix */ 3713 nlocal = m; 3714 } else { 3715 nlocal = n/size + ((n % size) > rank); 3716 } 3717 } else { 3718 nlocal = csize; 3719 } 3720 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3721 rstart = rend - nlocal; 3722 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3723 3724 /* next, compute all the lengths */ 3725 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3726 olens = dlens + m; 3727 for (i=0; i<m; i++) { 3728 jend = ii[i+1] - ii[i]; 3729 olen = 0; 3730 dlen = 0; 3731 for (j=0; j<jend; j++) { 3732 if (*jj < rstart || *jj >= rend) olen++; 3733 else dlen++; 3734 jj++; 3735 } 3736 olens[i] = olen; 3737 dlens[i] = dlen; 3738 } 3739 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3740 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3741 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3742 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3743 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3744 ierr = PetscFree(dlens);CHKERRQ(ierr); 3745 } else { 3746 PetscInt ml,nl; 3747 3748 M = *newmat; 3749 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3750 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3751 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3752 /* 3753 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3754 rather than the slower MatSetValues(). 3755 */ 3756 M->was_assembled = PETSC_TRUE; 3757 M->assembled = PETSC_FALSE; 3758 } 3759 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3760 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3761 ii = aij->i; 3762 jj = aij->j; 3763 aa = aij->a; 3764 for (i=0; i<m; i++) { 3765 row = rstart + i; 3766 nz = ii[i+1] - ii[i]; 3767 cwork = jj; jj += nz; 3768 vwork = aa; aa += nz; 3769 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3770 } 3771 3772 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3773 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3774 *newmat = M; 3775 3776 /* save submatrix used in processor for next request */ 3777 if (call == MAT_INITIAL_MATRIX) { 3778 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3779 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3780 } 3781 PetscFunctionReturn(0); 3782 } 3783 3784 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3785 { 3786 PetscInt m,cstart, cend,j,nnz,i,d; 3787 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3788 const PetscInt *JJ; 3789 PetscScalar *values; 3790 PetscErrorCode ierr; 3791 PetscBool nooffprocentries; 3792 3793 PetscFunctionBegin; 3794 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3795 3796 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3797 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3798 m = B->rmap->n; 3799 cstart = B->cmap->rstart; 3800 cend = B->cmap->rend; 3801 rstart = B->rmap->rstart; 3802 3803 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3804 3805 #if defined(PETSC_USE_DEBUG) 3806 for (i=0; i<m; i++) { 3807 nnz = Ii[i+1]- Ii[i]; 3808 JJ = J + Ii[i]; 3809 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3810 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3811 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3812 } 3813 #endif 3814 3815 for (i=0; i<m; i++) { 3816 nnz = Ii[i+1]- Ii[i]; 3817 JJ = J + Ii[i]; 3818 nnz_max = PetscMax(nnz_max,nnz); 3819 d = 0; 3820 for (j=0; j<nnz; j++) { 3821 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3822 } 3823 d_nnz[i] = d; 3824 o_nnz[i] = nnz - d; 3825 } 3826 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3827 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3828 3829 if (v) values = (PetscScalar*)v; 3830 else { 3831 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3832 } 3833 3834 for (i=0; i<m; i++) { 3835 ii = i + rstart; 3836 nnz = Ii[i+1]- Ii[i]; 3837 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3838 } 3839 nooffprocentries = B->nooffprocentries; 3840 B->nooffprocentries = PETSC_TRUE; 3841 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3842 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3843 B->nooffprocentries = nooffprocentries; 3844 3845 if (!v) { 3846 ierr = PetscFree(values);CHKERRQ(ierr); 3847 } 3848 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3849 PetscFunctionReturn(0); 3850 } 3851 3852 /*@ 3853 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3854 (the default parallel PETSc format). 3855 3856 Collective on MPI_Comm 3857 3858 Input Parameters: 3859 + B - the matrix 3860 . i - the indices into j for the start of each local row (starts with zero) 3861 . j - the column indices for each local row (starts with zero) 3862 - v - optional values in the matrix 3863 3864 Level: developer 3865 3866 Notes: 3867 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3868 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3869 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3870 3871 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3872 3873 The format which is used for the sparse matrix input, is equivalent to a 3874 row-major ordering.. i.e for the following matrix, the input data expected is 3875 as shown 3876 3877 $ 1 0 0 3878 $ 2 0 3 P0 3879 $ ------- 3880 $ 4 5 6 P1 3881 $ 3882 $ Process0 [P0]: rows_owned=[0,1] 3883 $ i = {0,1,3} [size = nrow+1 = 2+1] 3884 $ j = {0,0,2} [size = 3] 3885 $ v = {1,2,3} [size = 3] 3886 $ 3887 $ Process1 [P1]: rows_owned=[2] 3888 $ i = {0,3} [size = nrow+1 = 1+1] 3889 $ j = {0,1,2} [size = 3] 3890 $ v = {4,5,6} [size = 3] 3891 3892 .keywords: matrix, aij, compressed row, sparse, parallel 3893 3894 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3895 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3896 @*/ 3897 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3898 { 3899 PetscErrorCode ierr; 3900 3901 PetscFunctionBegin; 3902 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3903 PetscFunctionReturn(0); 3904 } 3905 3906 /*@C 3907 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3908 (the default parallel PETSc format). For good matrix assembly performance 3909 the user should preallocate the matrix storage by setting the parameters 3910 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3911 performance can be increased by more than a factor of 50. 3912 3913 Collective on MPI_Comm 3914 3915 Input Parameters: 3916 + B - the matrix 3917 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3918 (same value is used for all local rows) 3919 . d_nnz - array containing the number of nonzeros in the various rows of the 3920 DIAGONAL portion of the local submatrix (possibly different for each row) 3921 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3922 The size of this array is equal to the number of local rows, i.e 'm'. 3923 For matrices that will be factored, you must leave room for (and set) 3924 the diagonal entry even if it is zero. 3925 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3926 submatrix (same value is used for all local rows). 3927 - o_nnz - array containing the number of nonzeros in the various rows of the 3928 OFF-DIAGONAL portion of the local submatrix (possibly different for 3929 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3930 structure. The size of this array is equal to the number 3931 of local rows, i.e 'm'. 3932 3933 If the *_nnz parameter is given then the *_nz parameter is ignored 3934 3935 The AIJ format (also called the Yale sparse matrix format or 3936 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3937 storage. The stored row and column indices begin with zero. 3938 See Users-Manual: ch_mat for details. 3939 3940 The parallel matrix is partitioned such that the first m0 rows belong to 3941 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3942 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3943 3944 The DIAGONAL portion of the local submatrix of a processor can be defined 3945 as the submatrix which is obtained by extraction the part corresponding to 3946 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3947 first row that belongs to the processor, r2 is the last row belonging to 3948 the this processor, and c1-c2 is range of indices of the local part of a 3949 vector suitable for applying the matrix to. This is an mxn matrix. In the 3950 common case of a square matrix, the row and column ranges are the same and 3951 the DIAGONAL part is also square. The remaining portion of the local 3952 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3953 3954 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3955 3956 You can call MatGetInfo() to get information on how effective the preallocation was; 3957 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3958 You can also run with the option -info and look for messages with the string 3959 malloc in them to see if additional memory allocation was needed. 3960 3961 Example usage: 3962 3963 Consider the following 8x8 matrix with 34 non-zero values, that is 3964 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3965 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3966 as follows: 3967 3968 .vb 3969 1 2 0 | 0 3 0 | 0 4 3970 Proc0 0 5 6 | 7 0 0 | 8 0 3971 9 0 10 | 11 0 0 | 12 0 3972 ------------------------------------- 3973 13 0 14 | 15 16 17 | 0 0 3974 Proc1 0 18 0 | 19 20 21 | 0 0 3975 0 0 0 | 22 23 0 | 24 0 3976 ------------------------------------- 3977 Proc2 25 26 27 | 0 0 28 | 29 0 3978 30 0 0 | 31 32 33 | 0 34 3979 .ve 3980 3981 This can be represented as a collection of submatrices as: 3982 3983 .vb 3984 A B C 3985 D E F 3986 G H I 3987 .ve 3988 3989 Where the submatrices A,B,C are owned by proc0, D,E,F are 3990 owned by proc1, G,H,I are owned by proc2. 3991 3992 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3993 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3994 The 'M','N' parameters are 8,8, and have the same values on all procs. 3995 3996 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3997 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3998 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3999 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4000 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4001 matrix, ans [DF] as another SeqAIJ matrix. 4002 4003 When d_nz, o_nz parameters are specified, d_nz storage elements are 4004 allocated for every row of the local diagonal submatrix, and o_nz 4005 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4006 One way to choose d_nz and o_nz is to use the max nonzerors per local 4007 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4008 In this case, the values of d_nz,o_nz are: 4009 .vb 4010 proc0 : dnz = 2, o_nz = 2 4011 proc1 : dnz = 3, o_nz = 2 4012 proc2 : dnz = 1, o_nz = 4 4013 .ve 4014 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4015 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4016 for proc3. i.e we are using 12+15+10=37 storage locations to store 4017 34 values. 4018 4019 When d_nnz, o_nnz parameters are specified, the storage is specified 4020 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4021 In the above case the values for d_nnz,o_nnz are: 4022 .vb 4023 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4024 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4025 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4026 .ve 4027 Here the space allocated is sum of all the above values i.e 34, and 4028 hence pre-allocation is perfect. 4029 4030 Level: intermediate 4031 4032 .keywords: matrix, aij, compressed row, sparse, parallel 4033 4034 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4035 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4036 @*/ 4037 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4038 { 4039 PetscErrorCode ierr; 4040 4041 PetscFunctionBegin; 4042 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4043 PetscValidType(B,1); 4044 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4045 PetscFunctionReturn(0); 4046 } 4047 4048 /*@ 4049 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4050 CSR format the local rows. 4051 4052 Collective on MPI_Comm 4053 4054 Input Parameters: 4055 + comm - MPI communicator 4056 . m - number of local rows (Cannot be PETSC_DECIDE) 4057 . n - This value should be the same as the local size used in creating the 4058 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4059 calculated if N is given) For square matrices n is almost always m. 4060 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4061 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4062 . i - row indices 4063 . j - column indices 4064 - a - matrix values 4065 4066 Output Parameter: 4067 . mat - the matrix 4068 4069 Level: intermediate 4070 4071 Notes: 4072 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4073 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4074 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4075 4076 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4077 4078 The format which is used for the sparse matrix input, is equivalent to a 4079 row-major ordering.. i.e for the following matrix, the input data expected is 4080 as shown 4081 4082 $ 1 0 0 4083 $ 2 0 3 P0 4084 $ ------- 4085 $ 4 5 6 P1 4086 $ 4087 $ Process0 [P0]: rows_owned=[0,1] 4088 $ i = {0,1,3} [size = nrow+1 = 2+1] 4089 $ j = {0,0,2} [size = 3] 4090 $ v = {1,2,3} [size = 3] 4091 $ 4092 $ Process1 [P1]: rows_owned=[2] 4093 $ i = {0,3} [size = nrow+1 = 1+1] 4094 $ j = {0,1,2} [size = 3] 4095 $ v = {4,5,6} [size = 3] 4096 4097 .keywords: matrix, aij, compressed row, sparse, parallel 4098 4099 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4100 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4101 @*/ 4102 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4103 { 4104 PetscErrorCode ierr; 4105 4106 PetscFunctionBegin; 4107 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4108 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4109 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4110 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4111 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4112 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4113 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4114 PetscFunctionReturn(0); 4115 } 4116 4117 /*@C 4118 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4119 (the default parallel PETSc format). For good matrix assembly performance 4120 the user should preallocate the matrix storage by setting the parameters 4121 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4122 performance can be increased by more than a factor of 50. 4123 4124 Collective on MPI_Comm 4125 4126 Input Parameters: 4127 + comm - MPI communicator 4128 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4129 This value should be the same as the local size used in creating the 4130 y vector for the matrix-vector product y = Ax. 4131 . n - This value should be the same as the local size used in creating the 4132 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4133 calculated if N is given) For square matrices n is almost always m. 4134 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4135 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4136 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4137 (same value is used for all local rows) 4138 . d_nnz - array containing the number of nonzeros in the various rows of the 4139 DIAGONAL portion of the local submatrix (possibly different for each row) 4140 or NULL, if d_nz is used to specify the nonzero structure. 4141 The size of this array is equal to the number of local rows, i.e 'm'. 4142 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4143 submatrix (same value is used for all local rows). 4144 - o_nnz - array containing the number of nonzeros in the various rows of the 4145 OFF-DIAGONAL portion of the local submatrix (possibly different for 4146 each row) or NULL, if o_nz is used to specify the nonzero 4147 structure. The size of this array is equal to the number 4148 of local rows, i.e 'm'. 4149 4150 Output Parameter: 4151 . A - the matrix 4152 4153 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4154 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4155 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4156 4157 Notes: 4158 If the *_nnz parameter is given then the *_nz parameter is ignored 4159 4160 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4161 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4162 storage requirements for this matrix. 4163 4164 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4165 processor than it must be used on all processors that share the object for 4166 that argument. 4167 4168 The user MUST specify either the local or global matrix dimensions 4169 (possibly both). 4170 4171 The parallel matrix is partitioned across processors such that the 4172 first m0 rows belong to process 0, the next m1 rows belong to 4173 process 1, the next m2 rows belong to process 2 etc.. where 4174 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4175 values corresponding to [m x N] submatrix. 4176 4177 The columns are logically partitioned with the n0 columns belonging 4178 to 0th partition, the next n1 columns belonging to the next 4179 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4180 4181 The DIAGONAL portion of the local submatrix on any given processor 4182 is the submatrix corresponding to the rows and columns m,n 4183 corresponding to the given processor. i.e diagonal matrix on 4184 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4185 etc. The remaining portion of the local submatrix [m x (N-n)] 4186 constitute the OFF-DIAGONAL portion. The example below better 4187 illustrates this concept. 4188 4189 For a square global matrix we define each processor's diagonal portion 4190 to be its local rows and the corresponding columns (a square submatrix); 4191 each processor's off-diagonal portion encompasses the remainder of the 4192 local matrix (a rectangular submatrix). 4193 4194 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4195 4196 When calling this routine with a single process communicator, a matrix of 4197 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4198 type of communicator, use the construction mechanism 4199 .vb 4200 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4201 .ve 4202 4203 $ MatCreate(...,&A); 4204 $ MatSetType(A,MATMPIAIJ); 4205 $ MatSetSizes(A, m,n,M,N); 4206 $ MatMPIAIJSetPreallocation(A,...); 4207 4208 By default, this format uses inodes (identical nodes) when possible. 4209 We search for consecutive rows with the same nonzero structure, thereby 4210 reusing matrix information to achieve increased efficiency. 4211 4212 Options Database Keys: 4213 + -mat_no_inode - Do not use inodes 4214 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4215 4216 4217 4218 Example usage: 4219 4220 Consider the following 8x8 matrix with 34 non-zero values, that is 4221 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4222 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4223 as follows 4224 4225 .vb 4226 1 2 0 | 0 3 0 | 0 4 4227 Proc0 0 5 6 | 7 0 0 | 8 0 4228 9 0 10 | 11 0 0 | 12 0 4229 ------------------------------------- 4230 13 0 14 | 15 16 17 | 0 0 4231 Proc1 0 18 0 | 19 20 21 | 0 0 4232 0 0 0 | 22 23 0 | 24 0 4233 ------------------------------------- 4234 Proc2 25 26 27 | 0 0 28 | 29 0 4235 30 0 0 | 31 32 33 | 0 34 4236 .ve 4237 4238 This can be represented as a collection of submatrices as 4239 4240 .vb 4241 A B C 4242 D E F 4243 G H I 4244 .ve 4245 4246 Where the submatrices A,B,C are owned by proc0, D,E,F are 4247 owned by proc1, G,H,I are owned by proc2. 4248 4249 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4250 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4251 The 'M','N' parameters are 8,8, and have the same values on all procs. 4252 4253 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4254 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4255 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4256 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4257 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4258 matrix, ans [DF] as another SeqAIJ matrix. 4259 4260 When d_nz, o_nz parameters are specified, d_nz storage elements are 4261 allocated for every row of the local diagonal submatrix, and o_nz 4262 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4263 One way to choose d_nz and o_nz is to use the max nonzerors per local 4264 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4265 In this case, the values of d_nz,o_nz are 4266 .vb 4267 proc0 : dnz = 2, o_nz = 2 4268 proc1 : dnz = 3, o_nz = 2 4269 proc2 : dnz = 1, o_nz = 4 4270 .ve 4271 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4272 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4273 for proc3. i.e we are using 12+15+10=37 storage locations to store 4274 34 values. 4275 4276 When d_nnz, o_nnz parameters are specified, the storage is specified 4277 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4278 In the above case the values for d_nnz,o_nnz are 4279 .vb 4280 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4281 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4282 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4283 .ve 4284 Here the space allocated is sum of all the above values i.e 34, and 4285 hence pre-allocation is perfect. 4286 4287 Level: intermediate 4288 4289 .keywords: matrix, aij, compressed row, sparse, parallel 4290 4291 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4292 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4293 @*/ 4294 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4295 { 4296 PetscErrorCode ierr; 4297 PetscMPIInt size; 4298 4299 PetscFunctionBegin; 4300 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4301 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4302 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4303 if (size > 1) { 4304 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4305 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4306 } else { 4307 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4308 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4309 } 4310 PetscFunctionReturn(0); 4311 } 4312 4313 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4314 { 4315 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4316 PetscBool flg; 4317 PetscErrorCode ierr; 4318 4319 PetscFunctionBegin; 4320 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4321 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4322 if (Ad) *Ad = a->A; 4323 if (Ao) *Ao = a->B; 4324 if (colmap) *colmap = a->garray; 4325 PetscFunctionReturn(0); 4326 } 4327 4328 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4329 { 4330 PetscErrorCode ierr; 4331 PetscInt m,N,i,rstart,nnz,Ii; 4332 PetscInt *indx; 4333 PetscScalar *values; 4334 4335 PetscFunctionBegin; 4336 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4337 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4338 PetscInt *dnz,*onz,sum,bs,cbs; 4339 4340 if (n == PETSC_DECIDE) { 4341 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4342 } 4343 /* Check sum(n) = N */ 4344 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4345 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4346 4347 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4348 rstart -= m; 4349 4350 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4351 for (i=0; i<m; i++) { 4352 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4353 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4354 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4355 } 4356 4357 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4358 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4359 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4360 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4361 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4362 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4363 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4364 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4365 } 4366 4367 /* numeric phase */ 4368 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4369 for (i=0; i<m; i++) { 4370 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4371 Ii = i + rstart; 4372 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4373 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4374 } 4375 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4376 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4377 PetscFunctionReturn(0); 4378 } 4379 4380 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4381 { 4382 PetscErrorCode ierr; 4383 PetscMPIInt rank; 4384 PetscInt m,N,i,rstart,nnz; 4385 size_t len; 4386 const PetscInt *indx; 4387 PetscViewer out; 4388 char *name; 4389 Mat B; 4390 const PetscScalar *values; 4391 4392 PetscFunctionBegin; 4393 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4394 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4395 /* Should this be the type of the diagonal block of A? */ 4396 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4397 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4398 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4399 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4400 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4401 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4402 for (i=0; i<m; i++) { 4403 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4404 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4405 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4406 } 4407 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4408 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4409 4410 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4411 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4412 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4413 sprintf(name,"%s.%d",outfile,rank); 4414 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4415 ierr = PetscFree(name);CHKERRQ(ierr); 4416 ierr = MatView(B,out);CHKERRQ(ierr); 4417 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4418 ierr = MatDestroy(&B);CHKERRQ(ierr); 4419 PetscFunctionReturn(0); 4420 } 4421 4422 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4423 { 4424 PetscErrorCode ierr; 4425 Mat_Merge_SeqsToMPI *merge; 4426 PetscContainer container; 4427 4428 PetscFunctionBegin; 4429 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4430 if (container) { 4431 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4432 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4433 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4434 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4435 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4436 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4437 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4438 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4439 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4440 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4441 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4442 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4443 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4444 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4445 ierr = PetscFree(merge);CHKERRQ(ierr); 4446 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4447 } 4448 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4449 PetscFunctionReturn(0); 4450 } 4451 4452 #include <../src/mat/utils/freespace.h> 4453 #include <petscbt.h> 4454 4455 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4456 { 4457 PetscErrorCode ierr; 4458 MPI_Comm comm; 4459 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4460 PetscMPIInt size,rank,taga,*len_s; 4461 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4462 PetscInt proc,m; 4463 PetscInt **buf_ri,**buf_rj; 4464 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4465 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4466 MPI_Request *s_waits,*r_waits; 4467 MPI_Status *status; 4468 MatScalar *aa=a->a; 4469 MatScalar **abuf_r,*ba_i; 4470 Mat_Merge_SeqsToMPI *merge; 4471 PetscContainer container; 4472 4473 PetscFunctionBegin; 4474 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4475 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4476 4477 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4478 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4479 4480 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4481 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4482 4483 bi = merge->bi; 4484 bj = merge->bj; 4485 buf_ri = merge->buf_ri; 4486 buf_rj = merge->buf_rj; 4487 4488 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4489 owners = merge->rowmap->range; 4490 len_s = merge->len_s; 4491 4492 /* send and recv matrix values */ 4493 /*-----------------------------*/ 4494 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4495 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4496 4497 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4498 for (proc=0,k=0; proc<size; proc++) { 4499 if (!len_s[proc]) continue; 4500 i = owners[proc]; 4501 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4502 k++; 4503 } 4504 4505 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4506 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4507 ierr = PetscFree(status);CHKERRQ(ierr); 4508 4509 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4510 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4511 4512 /* insert mat values of mpimat */ 4513 /*----------------------------*/ 4514 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4515 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4516 4517 for (k=0; k<merge->nrecv; k++) { 4518 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4519 nrows = *(buf_ri_k[k]); 4520 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4521 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4522 } 4523 4524 /* set values of ba */ 4525 m = merge->rowmap->n; 4526 for (i=0; i<m; i++) { 4527 arow = owners[rank] + i; 4528 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4529 bnzi = bi[i+1] - bi[i]; 4530 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4531 4532 /* add local non-zero vals of this proc's seqmat into ba */ 4533 anzi = ai[arow+1] - ai[arow]; 4534 aj = a->j + ai[arow]; 4535 aa = a->a + ai[arow]; 4536 nextaj = 0; 4537 for (j=0; nextaj<anzi; j++) { 4538 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4539 ba_i[j] += aa[nextaj++]; 4540 } 4541 } 4542 4543 /* add received vals into ba */ 4544 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4545 /* i-th row */ 4546 if (i == *nextrow[k]) { 4547 anzi = *(nextai[k]+1) - *nextai[k]; 4548 aj = buf_rj[k] + *(nextai[k]); 4549 aa = abuf_r[k] + *(nextai[k]); 4550 nextaj = 0; 4551 for (j=0; nextaj<anzi; j++) { 4552 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4553 ba_i[j] += aa[nextaj++]; 4554 } 4555 } 4556 nextrow[k]++; nextai[k]++; 4557 } 4558 } 4559 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4560 } 4561 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4562 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4563 4564 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4565 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4566 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4567 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4568 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4569 PetscFunctionReturn(0); 4570 } 4571 4572 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4573 { 4574 PetscErrorCode ierr; 4575 Mat B_mpi; 4576 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4577 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4578 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4579 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4580 PetscInt len,proc,*dnz,*onz,bs,cbs; 4581 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4582 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4583 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4584 MPI_Status *status; 4585 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4586 PetscBT lnkbt; 4587 Mat_Merge_SeqsToMPI *merge; 4588 PetscContainer container; 4589 4590 PetscFunctionBegin; 4591 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4592 4593 /* make sure it is a PETSc comm */ 4594 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4595 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4596 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4597 4598 ierr = PetscNew(&merge);CHKERRQ(ierr); 4599 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4600 4601 /* determine row ownership */ 4602 /*---------------------------------------------------------*/ 4603 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4604 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4605 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4606 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4607 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4608 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4609 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4610 4611 m = merge->rowmap->n; 4612 owners = merge->rowmap->range; 4613 4614 /* determine the number of messages to send, their lengths */ 4615 /*---------------------------------------------------------*/ 4616 len_s = merge->len_s; 4617 4618 len = 0; /* length of buf_si[] */ 4619 merge->nsend = 0; 4620 for (proc=0; proc<size; proc++) { 4621 len_si[proc] = 0; 4622 if (proc == rank) { 4623 len_s[proc] = 0; 4624 } else { 4625 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4626 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4627 } 4628 if (len_s[proc]) { 4629 merge->nsend++; 4630 nrows = 0; 4631 for (i=owners[proc]; i<owners[proc+1]; i++) { 4632 if (ai[i+1] > ai[i]) nrows++; 4633 } 4634 len_si[proc] = 2*(nrows+1); 4635 len += len_si[proc]; 4636 } 4637 } 4638 4639 /* determine the number and length of messages to receive for ij-structure */ 4640 /*-------------------------------------------------------------------------*/ 4641 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4642 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4643 4644 /* post the Irecv of j-structure */ 4645 /*-------------------------------*/ 4646 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4647 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4648 4649 /* post the Isend of j-structure */ 4650 /*--------------------------------*/ 4651 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4652 4653 for (proc=0, k=0; proc<size; proc++) { 4654 if (!len_s[proc]) continue; 4655 i = owners[proc]; 4656 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4657 k++; 4658 } 4659 4660 /* receives and sends of j-structure are complete */ 4661 /*------------------------------------------------*/ 4662 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4663 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4664 4665 /* send and recv i-structure */ 4666 /*---------------------------*/ 4667 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4668 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4669 4670 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4671 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4672 for (proc=0,k=0; proc<size; proc++) { 4673 if (!len_s[proc]) continue; 4674 /* form outgoing message for i-structure: 4675 buf_si[0]: nrows to be sent 4676 [1:nrows]: row index (global) 4677 [nrows+1:2*nrows+1]: i-structure index 4678 */ 4679 /*-------------------------------------------*/ 4680 nrows = len_si[proc]/2 - 1; 4681 buf_si_i = buf_si + nrows+1; 4682 buf_si[0] = nrows; 4683 buf_si_i[0] = 0; 4684 nrows = 0; 4685 for (i=owners[proc]; i<owners[proc+1]; i++) { 4686 anzi = ai[i+1] - ai[i]; 4687 if (anzi) { 4688 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4689 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4690 nrows++; 4691 } 4692 } 4693 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4694 k++; 4695 buf_si += len_si[proc]; 4696 } 4697 4698 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4699 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4700 4701 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4702 for (i=0; i<merge->nrecv; i++) { 4703 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4704 } 4705 4706 ierr = PetscFree(len_si);CHKERRQ(ierr); 4707 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4708 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4709 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4710 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4711 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4712 ierr = PetscFree(status);CHKERRQ(ierr); 4713 4714 /* compute a local seq matrix in each processor */ 4715 /*----------------------------------------------*/ 4716 /* allocate bi array and free space for accumulating nonzero column info */ 4717 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4718 bi[0] = 0; 4719 4720 /* create and initialize a linked list */ 4721 nlnk = N+1; 4722 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4723 4724 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4725 len = ai[owners[rank+1]] - ai[owners[rank]]; 4726 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4727 4728 current_space = free_space; 4729 4730 /* determine symbolic info for each local row */ 4731 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4732 4733 for (k=0; k<merge->nrecv; k++) { 4734 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4735 nrows = *buf_ri_k[k]; 4736 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4737 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4738 } 4739 4740 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4741 len = 0; 4742 for (i=0; i<m; i++) { 4743 bnzi = 0; 4744 /* add local non-zero cols of this proc's seqmat into lnk */ 4745 arow = owners[rank] + i; 4746 anzi = ai[arow+1] - ai[arow]; 4747 aj = a->j + ai[arow]; 4748 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4749 bnzi += nlnk; 4750 /* add received col data into lnk */ 4751 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4752 if (i == *nextrow[k]) { /* i-th row */ 4753 anzi = *(nextai[k]+1) - *nextai[k]; 4754 aj = buf_rj[k] + *nextai[k]; 4755 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4756 bnzi += nlnk; 4757 nextrow[k]++; nextai[k]++; 4758 } 4759 } 4760 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4761 4762 /* if free space is not available, make more free space */ 4763 if (current_space->local_remaining<bnzi) { 4764 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4765 nspacedouble++; 4766 } 4767 /* copy data into free space, then initialize lnk */ 4768 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4769 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4770 4771 current_space->array += bnzi; 4772 current_space->local_used += bnzi; 4773 current_space->local_remaining -= bnzi; 4774 4775 bi[i+1] = bi[i] + bnzi; 4776 } 4777 4778 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4779 4780 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4781 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4782 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4783 4784 /* create symbolic parallel matrix B_mpi */ 4785 /*---------------------------------------*/ 4786 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4787 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4788 if (n==PETSC_DECIDE) { 4789 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4790 } else { 4791 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4792 } 4793 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4794 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4795 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4796 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4797 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4798 4799 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4800 B_mpi->assembled = PETSC_FALSE; 4801 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4802 merge->bi = bi; 4803 merge->bj = bj; 4804 merge->buf_ri = buf_ri; 4805 merge->buf_rj = buf_rj; 4806 merge->coi = NULL; 4807 merge->coj = NULL; 4808 merge->owners_co = NULL; 4809 4810 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4811 4812 /* attach the supporting struct to B_mpi for reuse */ 4813 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4814 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4815 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4816 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4817 *mpimat = B_mpi; 4818 4819 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4820 PetscFunctionReturn(0); 4821 } 4822 4823 /*@C 4824 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4825 matrices from each processor 4826 4827 Collective on MPI_Comm 4828 4829 Input Parameters: 4830 + comm - the communicators the parallel matrix will live on 4831 . seqmat - the input sequential matrices 4832 . m - number of local rows (or PETSC_DECIDE) 4833 . n - number of local columns (or PETSC_DECIDE) 4834 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4835 4836 Output Parameter: 4837 . mpimat - the parallel matrix generated 4838 4839 Level: advanced 4840 4841 Notes: 4842 The dimensions of the sequential matrix in each processor MUST be the same. 4843 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4844 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4845 @*/ 4846 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4847 { 4848 PetscErrorCode ierr; 4849 PetscMPIInt size; 4850 4851 PetscFunctionBegin; 4852 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4853 if (size == 1) { 4854 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4855 if (scall == MAT_INITIAL_MATRIX) { 4856 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4857 } else { 4858 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4859 } 4860 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4861 PetscFunctionReturn(0); 4862 } 4863 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4864 if (scall == MAT_INITIAL_MATRIX) { 4865 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4866 } 4867 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4868 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4869 PetscFunctionReturn(0); 4870 } 4871 4872 /*@ 4873 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4874 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4875 with MatGetSize() 4876 4877 Not Collective 4878 4879 Input Parameters: 4880 + A - the matrix 4881 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4882 4883 Output Parameter: 4884 . A_loc - the local sequential matrix generated 4885 4886 Level: developer 4887 4888 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4889 4890 @*/ 4891 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4892 { 4893 PetscErrorCode ierr; 4894 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4895 Mat_SeqAIJ *mat,*a,*b; 4896 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4897 MatScalar *aa,*ba,*cam; 4898 PetscScalar *ca; 4899 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4900 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4901 PetscBool match; 4902 MPI_Comm comm; 4903 PetscMPIInt size; 4904 4905 PetscFunctionBegin; 4906 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4907 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4908 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4909 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4910 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4911 4912 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4913 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4914 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4915 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4916 aa = a->a; ba = b->a; 4917 if (scall == MAT_INITIAL_MATRIX) { 4918 if (size == 1) { 4919 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4920 PetscFunctionReturn(0); 4921 } 4922 4923 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4924 ci[0] = 0; 4925 for (i=0; i<am; i++) { 4926 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4927 } 4928 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4929 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4930 k = 0; 4931 for (i=0; i<am; i++) { 4932 ncols_o = bi[i+1] - bi[i]; 4933 ncols_d = ai[i+1] - ai[i]; 4934 /* off-diagonal portion of A */ 4935 for (jo=0; jo<ncols_o; jo++) { 4936 col = cmap[*bj]; 4937 if (col >= cstart) break; 4938 cj[k] = col; bj++; 4939 ca[k++] = *ba++; 4940 } 4941 /* diagonal portion of A */ 4942 for (j=0; j<ncols_d; j++) { 4943 cj[k] = cstart + *aj++; 4944 ca[k++] = *aa++; 4945 } 4946 /* off-diagonal portion of A */ 4947 for (j=jo; j<ncols_o; j++) { 4948 cj[k] = cmap[*bj++]; 4949 ca[k++] = *ba++; 4950 } 4951 } 4952 /* put together the new matrix */ 4953 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4954 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4955 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4956 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4957 mat->free_a = PETSC_TRUE; 4958 mat->free_ij = PETSC_TRUE; 4959 mat->nonew = 0; 4960 } else if (scall == MAT_REUSE_MATRIX) { 4961 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4962 ci = mat->i; cj = mat->j; cam = mat->a; 4963 for (i=0; i<am; i++) { 4964 /* off-diagonal portion of A */ 4965 ncols_o = bi[i+1] - bi[i]; 4966 for (jo=0; jo<ncols_o; jo++) { 4967 col = cmap[*bj]; 4968 if (col >= cstart) break; 4969 *cam++ = *ba++; bj++; 4970 } 4971 /* diagonal portion of A */ 4972 ncols_d = ai[i+1] - ai[i]; 4973 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4974 /* off-diagonal portion of A */ 4975 for (j=jo; j<ncols_o; j++) { 4976 *cam++ = *ba++; bj++; 4977 } 4978 } 4979 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4980 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4981 PetscFunctionReturn(0); 4982 } 4983 4984 /*@C 4985 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4986 4987 Not Collective 4988 4989 Input Parameters: 4990 + A - the matrix 4991 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4992 - row, col - index sets of rows and columns to extract (or NULL) 4993 4994 Output Parameter: 4995 . A_loc - the local sequential matrix generated 4996 4997 Level: developer 4998 4999 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5000 5001 @*/ 5002 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5003 { 5004 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5005 PetscErrorCode ierr; 5006 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5007 IS isrowa,iscola; 5008 Mat *aloc; 5009 PetscBool match; 5010 5011 PetscFunctionBegin; 5012 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5013 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5014 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5015 if (!row) { 5016 start = A->rmap->rstart; end = A->rmap->rend; 5017 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5018 } else { 5019 isrowa = *row; 5020 } 5021 if (!col) { 5022 start = A->cmap->rstart; 5023 cmap = a->garray; 5024 nzA = a->A->cmap->n; 5025 nzB = a->B->cmap->n; 5026 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5027 ncols = 0; 5028 for (i=0; i<nzB; i++) { 5029 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5030 else break; 5031 } 5032 imark = i; 5033 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5034 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5035 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5036 } else { 5037 iscola = *col; 5038 } 5039 if (scall != MAT_INITIAL_MATRIX) { 5040 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5041 aloc[0] = *A_loc; 5042 } 5043 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5044 *A_loc = aloc[0]; 5045 ierr = PetscFree(aloc);CHKERRQ(ierr); 5046 if (!row) { 5047 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5048 } 5049 if (!col) { 5050 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5051 } 5052 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5053 PetscFunctionReturn(0); 5054 } 5055 5056 /*@C 5057 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5058 5059 Collective on Mat 5060 5061 Input Parameters: 5062 + A,B - the matrices in mpiaij format 5063 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5064 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5065 5066 Output Parameter: 5067 + rowb, colb - index sets of rows and columns of B to extract 5068 - B_seq - the sequential matrix generated 5069 5070 Level: developer 5071 5072 @*/ 5073 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5074 { 5075 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5076 PetscErrorCode ierr; 5077 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5078 IS isrowb,iscolb; 5079 Mat *bseq=NULL; 5080 5081 PetscFunctionBegin; 5082 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5083 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5084 } 5085 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5086 5087 if (scall == MAT_INITIAL_MATRIX) { 5088 start = A->cmap->rstart; 5089 cmap = a->garray; 5090 nzA = a->A->cmap->n; 5091 nzB = a->B->cmap->n; 5092 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5093 ncols = 0; 5094 for (i=0; i<nzB; i++) { /* row < local row index */ 5095 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5096 else break; 5097 } 5098 imark = i; 5099 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5100 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5101 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5102 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5103 } else { 5104 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5105 isrowb = *rowb; iscolb = *colb; 5106 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5107 bseq[0] = *B_seq; 5108 } 5109 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5110 *B_seq = bseq[0]; 5111 ierr = PetscFree(bseq);CHKERRQ(ierr); 5112 if (!rowb) { 5113 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5114 } else { 5115 *rowb = isrowb; 5116 } 5117 if (!colb) { 5118 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5119 } else { 5120 *colb = iscolb; 5121 } 5122 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5123 PetscFunctionReturn(0); 5124 } 5125 5126 /* 5127 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5128 of the OFF-DIAGONAL portion of local A 5129 5130 Collective on Mat 5131 5132 Input Parameters: 5133 + A,B - the matrices in mpiaij format 5134 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5135 5136 Output Parameter: 5137 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5138 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5139 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5140 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5141 5142 Level: developer 5143 5144 */ 5145 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5146 { 5147 VecScatter_MPI_General *gen_to,*gen_from; 5148 PetscErrorCode ierr; 5149 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5150 Mat_SeqAIJ *b_oth; 5151 VecScatter ctx; 5152 MPI_Comm comm; 5153 PetscMPIInt *rprocs,*sprocs,tag,rank; 5154 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5155 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5156 PetscScalar *b_otha,*bufa,*bufA,*vals; 5157 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5158 MPI_Request *rwaits = NULL,*swaits = NULL; 5159 MPI_Status *sstatus,rstatus; 5160 PetscMPIInt jj,size; 5161 VecScatterType type; 5162 PetscBool mpi1; 5163 5164 PetscFunctionBegin; 5165 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5166 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5167 5168 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5170 } 5171 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5172 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5173 5174 if (size == 1) { 5175 startsj_s = NULL; 5176 bufa_ptr = NULL; 5177 *B_oth = NULL; 5178 PetscFunctionReturn(0); 5179 } 5180 5181 ctx = a->Mvctx; 5182 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5183 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5184 if (!mpi1) { 5185 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5186 thus create a->Mvctx_mpi1 */ 5187 if (!a->Mvctx_mpi1) { 5188 a->Mvctx_mpi1_flg = PETSC_TRUE; 5189 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5190 } 5191 ctx = a->Mvctx_mpi1; 5192 } 5193 tag = ((PetscObject)ctx)->tag; 5194 5195 gen_to = (VecScatter_MPI_General*)ctx->todata; 5196 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5197 nrecvs = gen_from->n; 5198 nsends = gen_to->n; 5199 5200 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5201 srow = gen_to->indices; /* local row index to be sent */ 5202 sstarts = gen_to->starts; 5203 sprocs = gen_to->procs; 5204 sstatus = gen_to->sstatus; 5205 sbs = gen_to->bs; 5206 rstarts = gen_from->starts; 5207 rprocs = gen_from->procs; 5208 rbs = gen_from->bs; 5209 5210 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5211 if (scall == MAT_INITIAL_MATRIX) { 5212 /* i-array */ 5213 /*---------*/ 5214 /* post receives */ 5215 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5216 for (i=0; i<nrecvs; i++) { 5217 rowlen = rvalues + rstarts[i]*rbs; 5218 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5219 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5220 } 5221 5222 /* pack the outgoing message */ 5223 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5224 5225 sstartsj[0] = 0; 5226 rstartsj[0] = 0; 5227 len = 0; /* total length of j or a array to be sent */ 5228 k = 0; 5229 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5230 for (i=0; i<nsends; i++) { 5231 rowlen = svalues + sstarts[i]*sbs; 5232 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5233 for (j=0; j<nrows; j++) { 5234 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5235 for (l=0; l<sbs; l++) { 5236 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5237 5238 rowlen[j*sbs+l] = ncols; 5239 5240 len += ncols; 5241 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5242 } 5243 k++; 5244 } 5245 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5246 5247 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5248 } 5249 /* recvs and sends of i-array are completed */ 5250 i = nrecvs; 5251 while (i--) { 5252 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5253 } 5254 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5255 ierr = PetscFree(svalues);CHKERRQ(ierr); 5256 5257 /* allocate buffers for sending j and a arrays */ 5258 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5259 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5260 5261 /* create i-array of B_oth */ 5262 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5263 5264 b_othi[0] = 0; 5265 len = 0; /* total length of j or a array to be received */ 5266 k = 0; 5267 for (i=0; i<nrecvs; i++) { 5268 rowlen = rvalues + rstarts[i]*rbs; 5269 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5270 for (j=0; j<nrows; j++) { 5271 b_othi[k+1] = b_othi[k] + rowlen[j]; 5272 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5273 k++; 5274 } 5275 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5276 } 5277 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5278 5279 /* allocate space for j and a arrrays of B_oth */ 5280 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5281 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5282 5283 /* j-array */ 5284 /*---------*/ 5285 /* post receives of j-array */ 5286 for (i=0; i<nrecvs; i++) { 5287 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5288 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5289 } 5290 5291 /* pack the outgoing message j-array */ 5292 k = 0; 5293 for (i=0; i<nsends; i++) { 5294 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5295 bufJ = bufj+sstartsj[i]; 5296 for (j=0; j<nrows; j++) { 5297 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5298 for (ll=0; ll<sbs; ll++) { 5299 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5300 for (l=0; l<ncols; l++) { 5301 *bufJ++ = cols[l]; 5302 } 5303 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5304 } 5305 } 5306 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5307 } 5308 5309 /* recvs and sends of j-array are completed */ 5310 i = nrecvs; 5311 while (i--) { 5312 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5313 } 5314 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5315 } else if (scall == MAT_REUSE_MATRIX) { 5316 sstartsj = *startsj_s; 5317 rstartsj = *startsj_r; 5318 bufa = *bufa_ptr; 5319 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5320 b_otha = b_oth->a; 5321 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5322 5323 /* a-array */ 5324 /*---------*/ 5325 /* post receives of a-array */ 5326 for (i=0; i<nrecvs; i++) { 5327 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5328 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5329 } 5330 5331 /* pack the outgoing message a-array */ 5332 k = 0; 5333 for (i=0; i<nsends; i++) { 5334 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5335 bufA = bufa+sstartsj[i]; 5336 for (j=0; j<nrows; j++) { 5337 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5338 for (ll=0; ll<sbs; ll++) { 5339 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5340 for (l=0; l<ncols; l++) { 5341 *bufA++ = vals[l]; 5342 } 5343 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5344 } 5345 } 5346 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5347 } 5348 /* recvs and sends of a-array are completed */ 5349 i = nrecvs; 5350 while (i--) { 5351 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5352 } 5353 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5354 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5355 5356 if (scall == MAT_INITIAL_MATRIX) { 5357 /* put together the new matrix */ 5358 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5359 5360 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5361 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5362 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5363 b_oth->free_a = PETSC_TRUE; 5364 b_oth->free_ij = PETSC_TRUE; 5365 b_oth->nonew = 0; 5366 5367 ierr = PetscFree(bufj);CHKERRQ(ierr); 5368 if (!startsj_s || !bufa_ptr) { 5369 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5370 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5371 } else { 5372 *startsj_s = sstartsj; 5373 *startsj_r = rstartsj; 5374 *bufa_ptr = bufa; 5375 } 5376 } 5377 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5378 PetscFunctionReturn(0); 5379 } 5380 5381 /*@C 5382 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5383 5384 Not Collective 5385 5386 Input Parameters: 5387 . A - The matrix in mpiaij format 5388 5389 Output Parameter: 5390 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5391 . colmap - A map from global column index to local index into lvec 5392 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5393 5394 Level: developer 5395 5396 @*/ 5397 #if defined(PETSC_USE_CTABLE) 5398 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5399 #else 5400 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5401 #endif 5402 { 5403 Mat_MPIAIJ *a; 5404 5405 PetscFunctionBegin; 5406 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5407 PetscValidPointer(lvec, 2); 5408 PetscValidPointer(colmap, 3); 5409 PetscValidPointer(multScatter, 4); 5410 a = (Mat_MPIAIJ*) A->data; 5411 if (lvec) *lvec = a->lvec; 5412 if (colmap) *colmap = a->colmap; 5413 if (multScatter) *multScatter = a->Mvctx; 5414 PetscFunctionReturn(0); 5415 } 5416 5417 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5418 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5419 #if defined(PETSC_HAVE_MKL_SPARSE) 5420 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5421 #endif 5422 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5423 #if defined(PETSC_HAVE_ELEMENTAL) 5424 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5425 #endif 5426 #if defined(PETSC_HAVE_HYPRE) 5427 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5428 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5429 #endif 5430 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5431 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5432 5433 /* 5434 Computes (B'*A')' since computing B*A directly is untenable 5435 5436 n p p 5437 ( ) ( ) ( ) 5438 m ( A ) * n ( B ) = m ( C ) 5439 ( ) ( ) ( ) 5440 5441 */ 5442 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5443 { 5444 PetscErrorCode ierr; 5445 Mat At,Bt,Ct; 5446 5447 PetscFunctionBegin; 5448 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5449 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5450 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5451 ierr = MatDestroy(&At);CHKERRQ(ierr); 5452 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5453 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5454 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5455 PetscFunctionReturn(0); 5456 } 5457 5458 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5459 { 5460 PetscErrorCode ierr; 5461 PetscInt m=A->rmap->n,n=B->cmap->n; 5462 Mat Cmat; 5463 5464 PetscFunctionBegin; 5465 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5466 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5467 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5468 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5469 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5470 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5471 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5472 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5473 5474 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5475 5476 *C = Cmat; 5477 PetscFunctionReturn(0); 5478 } 5479 5480 /* ----------------------------------------------------------------*/ 5481 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5482 { 5483 PetscErrorCode ierr; 5484 5485 PetscFunctionBegin; 5486 if (scall == MAT_INITIAL_MATRIX) { 5487 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5488 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5489 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5490 } 5491 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5492 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5493 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5494 PetscFunctionReturn(0); 5495 } 5496 5497 /*MC 5498 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5499 5500 Options Database Keys: 5501 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5502 5503 Level: beginner 5504 5505 .seealso: MatCreateAIJ() 5506 M*/ 5507 5508 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5509 { 5510 Mat_MPIAIJ *b; 5511 PetscErrorCode ierr; 5512 PetscMPIInt size; 5513 5514 PetscFunctionBegin; 5515 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5516 5517 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5518 B->data = (void*)b; 5519 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5520 B->assembled = PETSC_FALSE; 5521 B->insertmode = NOT_SET_VALUES; 5522 b->size = size; 5523 5524 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5525 5526 /* build cache for off array entries formed */ 5527 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5528 5529 b->donotstash = PETSC_FALSE; 5530 b->colmap = 0; 5531 b->garray = 0; 5532 b->roworiented = PETSC_TRUE; 5533 5534 /* stuff used for matrix vector multiply */ 5535 b->lvec = NULL; 5536 b->Mvctx = NULL; 5537 5538 /* stuff for MatGetRow() */ 5539 b->rowindices = 0; 5540 b->rowvalues = 0; 5541 b->getrowactive = PETSC_FALSE; 5542 5543 /* flexible pointer used in CUSP/CUSPARSE classes */ 5544 b->spptr = NULL; 5545 5546 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5547 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5548 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5549 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5550 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5551 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5552 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5553 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5554 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5555 #if defined(PETSC_HAVE_MKL_SPARSE) 5556 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5557 #endif 5558 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5559 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5560 #if defined(PETSC_HAVE_ELEMENTAL) 5561 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5562 #endif 5563 #if defined(PETSC_HAVE_HYPRE) 5564 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5565 #endif 5566 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5567 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5568 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5569 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5570 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5571 #if defined(PETSC_HAVE_HYPRE) 5572 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5573 #endif 5574 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5575 PetscFunctionReturn(0); 5576 } 5577 5578 /*@C 5579 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5580 and "off-diagonal" part of the matrix in CSR format. 5581 5582 Collective on MPI_Comm 5583 5584 Input Parameters: 5585 + comm - MPI communicator 5586 . m - number of local rows (Cannot be PETSC_DECIDE) 5587 . n - This value should be the same as the local size used in creating the 5588 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5589 calculated if N is given) For square matrices n is almost always m. 5590 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5591 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5592 . i - row indices for "diagonal" portion of matrix 5593 . j - column indices 5594 . a - matrix values 5595 . oi - row indices for "off-diagonal" portion of matrix 5596 . oj - column indices 5597 - oa - matrix values 5598 5599 Output Parameter: 5600 . mat - the matrix 5601 5602 Level: advanced 5603 5604 Notes: 5605 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5606 must free the arrays once the matrix has been destroyed and not before. 5607 5608 The i and j indices are 0 based 5609 5610 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5611 5612 This sets local rows and cannot be used to set off-processor values. 5613 5614 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5615 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5616 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5617 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5618 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5619 communication if it is known that only local entries will be set. 5620 5621 .keywords: matrix, aij, compressed row, sparse, parallel 5622 5623 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5624 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5625 @*/ 5626 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5627 { 5628 PetscErrorCode ierr; 5629 Mat_MPIAIJ *maij; 5630 5631 PetscFunctionBegin; 5632 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5633 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5634 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5635 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5636 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5637 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5638 maij = (Mat_MPIAIJ*) (*mat)->data; 5639 5640 (*mat)->preallocated = PETSC_TRUE; 5641 5642 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5643 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5644 5645 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5646 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5647 5648 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5649 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5650 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5651 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5652 5653 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5654 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5655 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5656 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5657 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5658 PetscFunctionReturn(0); 5659 } 5660 5661 /* 5662 Special version for direct calls from Fortran 5663 */ 5664 #include <petsc/private/fortranimpl.h> 5665 5666 /* Change these macros so can be used in void function */ 5667 #undef CHKERRQ 5668 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5669 #undef SETERRQ2 5670 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5671 #undef SETERRQ3 5672 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5673 #undef SETERRQ 5674 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5675 5676 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5677 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5678 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5679 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5680 #else 5681 #endif 5682 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5683 { 5684 Mat mat = *mmat; 5685 PetscInt m = *mm, n = *mn; 5686 InsertMode addv = *maddv; 5687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5688 PetscScalar value; 5689 PetscErrorCode ierr; 5690 5691 MatCheckPreallocated(mat,1); 5692 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5693 5694 #if defined(PETSC_USE_DEBUG) 5695 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5696 #endif 5697 { 5698 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5699 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5700 PetscBool roworiented = aij->roworiented; 5701 5702 /* Some Variables required in the macro */ 5703 Mat A = aij->A; 5704 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5705 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5706 MatScalar *aa = a->a; 5707 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5708 Mat B = aij->B; 5709 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5710 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5711 MatScalar *ba = b->a; 5712 5713 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5714 PetscInt nonew = a->nonew; 5715 MatScalar *ap1,*ap2; 5716 5717 PetscFunctionBegin; 5718 for (i=0; i<m; i++) { 5719 if (im[i] < 0) continue; 5720 #if defined(PETSC_USE_DEBUG) 5721 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5722 #endif 5723 if (im[i] >= rstart && im[i] < rend) { 5724 row = im[i] - rstart; 5725 lastcol1 = -1; 5726 rp1 = aj + ai[row]; 5727 ap1 = aa + ai[row]; 5728 rmax1 = aimax[row]; 5729 nrow1 = ailen[row]; 5730 low1 = 0; 5731 high1 = nrow1; 5732 lastcol2 = -1; 5733 rp2 = bj + bi[row]; 5734 ap2 = ba + bi[row]; 5735 rmax2 = bimax[row]; 5736 nrow2 = bilen[row]; 5737 low2 = 0; 5738 high2 = nrow2; 5739 5740 for (j=0; j<n; j++) { 5741 if (roworiented) value = v[i*n+j]; 5742 else value = v[i+j*m]; 5743 if (in[j] >= cstart && in[j] < cend) { 5744 col = in[j] - cstart; 5745 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5746 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5747 } else if (in[j] < 0) continue; 5748 #if defined(PETSC_USE_DEBUG) 5749 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5750 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5751 #endif 5752 else { 5753 if (mat->was_assembled) { 5754 if (!aij->colmap) { 5755 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5756 } 5757 #if defined(PETSC_USE_CTABLE) 5758 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5759 col--; 5760 #else 5761 col = aij->colmap[in[j]] - 1; 5762 #endif 5763 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5764 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5765 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5766 col = in[j]; 5767 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5768 B = aij->B; 5769 b = (Mat_SeqAIJ*)B->data; 5770 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5771 rp2 = bj + bi[row]; 5772 ap2 = ba + bi[row]; 5773 rmax2 = bimax[row]; 5774 nrow2 = bilen[row]; 5775 low2 = 0; 5776 high2 = nrow2; 5777 bm = aij->B->rmap->n; 5778 ba = b->a; 5779 } 5780 } else col = in[j]; 5781 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5782 } 5783 } 5784 } else if (!aij->donotstash) { 5785 if (roworiented) { 5786 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5787 } else { 5788 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5789 } 5790 } 5791 } 5792 } 5793 PetscFunctionReturnVoid(); 5794 } 5795 5796