1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 126 PetscFunctionBegin; 127 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 128 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 129 } else { 130 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 131 } 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 138 PetscErrorCode ierr; 139 PetscInt i,rstart,nrows,*rows; 140 141 PetscFunctionBegin; 142 *zrows = NULL; 143 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 144 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 145 for (i=0; i<nrows; i++) rows[i] += rstart; 146 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 151 { 152 PetscErrorCode ierr; 153 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 154 PetscInt i,n,*garray = aij->garray; 155 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 156 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 157 PetscReal *work; 158 159 PetscFunctionBegin; 160 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 161 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 162 if (type == NORM_2) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 168 } 169 } else if (type == NORM_1) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 175 } 176 } else if (type == NORM_INFINITY) { 177 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 178 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 179 } 180 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 181 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 182 } 183 184 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 185 if (type == NORM_INFINITY) { 186 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 187 } else { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } 190 ierr = PetscFree(work);CHKERRQ(ierr); 191 if (type == NORM_2) { 192 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 193 } 194 PetscFunctionReturn(0); 195 } 196 197 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 198 { 199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 200 IS sis,gis; 201 PetscErrorCode ierr; 202 const PetscInt *isis,*igis; 203 PetscInt n,*iis,nsis,ngis,rstart,i; 204 205 PetscFunctionBegin; 206 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 207 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 208 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 209 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 210 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 211 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 212 213 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 215 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 216 n = ngis + nsis; 217 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 218 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 219 for (i=0; i<n; i++) iis[i] += rstart; 220 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 221 222 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 223 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 224 ierr = ISDestroy(&sis);CHKERRQ(ierr); 225 ierr = ISDestroy(&gis);CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 229 /* 230 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 231 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 232 233 Only for square matrices 234 235 Used by a preconditioner, hence PETSC_EXTERN 236 */ 237 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 238 { 239 PetscMPIInt rank,size; 240 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 241 PetscErrorCode ierr; 242 Mat mat; 243 Mat_SeqAIJ *gmata; 244 PetscMPIInt tag; 245 MPI_Status status; 246 PetscBool aij; 247 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 248 249 PetscFunctionBegin; 250 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 251 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 252 if (!rank) { 253 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 254 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 255 } 256 if (reuse == MAT_INITIAL_MATRIX) { 257 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 258 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 259 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 260 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 261 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 262 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 263 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 264 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 265 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 266 267 rowners[0] = 0; 268 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 269 rstart = rowners[rank]; 270 rend = rowners[rank+1]; 271 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 272 if (!rank) { 273 gmata = (Mat_SeqAIJ*) gmat->data; 274 /* send row lengths to all processors */ 275 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 276 for (i=1; i<size; i++) { 277 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 278 } 279 /* determine number diagonal and off-diagonal counts */ 280 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 281 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 282 jj = 0; 283 for (i=0; i<m; i++) { 284 for (j=0; j<dlens[i]; j++) { 285 if (gmata->j[jj] < rstart) ld[i]++; 286 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 287 jj++; 288 } 289 } 290 /* send column indices to other processes */ 291 for (i=1; i<size; i++) { 292 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 293 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 295 } 296 297 /* send numerical values to other processes */ 298 for (i=1; i<size; i++) { 299 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 300 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 301 } 302 gmataa = gmata->a; 303 gmataj = gmata->j; 304 305 } else { 306 /* receive row lengths */ 307 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 /* receive column indices */ 309 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 311 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 /* determine number diagonal and off-diagonal counts */ 313 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 314 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 315 jj = 0; 316 for (i=0; i<m; i++) { 317 for (j=0; j<dlens[i]; j++) { 318 if (gmataj[jj] < rstart) ld[i]++; 319 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 320 jj++; 321 } 322 } 323 /* receive numerical values */ 324 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 325 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 326 } 327 /* set preallocation */ 328 for (i=0; i<m; i++) { 329 dlens[i] -= olens[i]; 330 } 331 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 332 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 333 334 for (i=0; i<m; i++) { 335 dlens[i] += olens[i]; 336 } 337 cnt = 0; 338 for (i=0; i<m; i++) { 339 row = rstart + i; 340 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 341 cnt += dlens[i]; 342 } 343 if (rank) { 344 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 345 } 346 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 347 ierr = PetscFree(rowners);CHKERRQ(ierr); 348 349 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 350 351 *inmat = mat; 352 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 353 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 354 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 355 mat = *inmat; 356 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 357 if (!rank) { 358 /* send numerical values to other processes */ 359 gmata = (Mat_SeqAIJ*) gmat->data; 360 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 361 gmataa = gmata->a; 362 for (i=1; i<size; i++) { 363 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 364 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 365 } 366 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 367 } else { 368 /* receive numerical values from process 0*/ 369 nz = Ad->nz + Ao->nz; 370 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 371 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 372 } 373 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 374 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 375 ad = Ad->a; 376 ao = Ao->a; 377 if (mat->rmap->n) { 378 i = 0; 379 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 380 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 381 } 382 for (i=1; i<mat->rmap->n; i++) { 383 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 384 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 385 } 386 i--; 387 if (mat->rmap->n) { 388 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 389 } 390 if (rank) { 391 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 392 } 393 } 394 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 396 PetscFunctionReturn(0); 397 } 398 399 /* 400 Local utility routine that creates a mapping from the global column 401 number to the local number in the off-diagonal part of the local 402 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 403 a slightly higher hash table cost; without it it is not scalable (each processor 404 has an order N integer array but is fast to acess. 405 */ 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 463 { \ 464 if (col <= lastcol2) low2 = 0; \ 465 else high2 = nrow2; \ 466 lastcol2 = col; \ 467 while (high2-low2 > 5) { \ 468 t = (low2+high2)/2; \ 469 if (rp2[t] > col) high2 = t; \ 470 else low2 = t; \ 471 } \ 472 for (_i=low2; _i<high2; _i++) { \ 473 if (rp2[_i] > col) break; \ 474 if (rp2[_i] == col) { \ 475 if (addv == ADD_VALUES) ap2[_i] += value; \ 476 else ap2[_i] = value; \ 477 goto b_noinsert; \ 478 } \ 479 } \ 480 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 483 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 484 N = nrow2++ - 1; b->nz++; high2++; \ 485 /* shift up all the later entries in this row */ \ 486 for (ii=N; ii>=_i; ii--) { \ 487 rp2[ii+1] = rp2[ii]; \ 488 ap2[ii+1] = ap2[ii]; \ 489 } \ 490 rp2[_i] = col; \ 491 ap2[_i] = value; \ 492 B->nonzerostate++; \ 493 b_noinsert: ; \ 494 bilen[row] = nrow2; \ 495 } 496 497 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 498 { 499 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 500 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 501 PetscErrorCode ierr; 502 PetscInt l,*garray = mat->garray,diag; 503 504 PetscFunctionBegin; 505 /* code only works for square matrices A */ 506 507 /* find size of row to the left of the diagonal part */ 508 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 509 row = row - diag; 510 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 511 if (garray[b->j[b->i[row]+l]] > diag) break; 512 } 513 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 514 515 /* diagonal part */ 516 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* right of diagonal part */ 519 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 524 { 525 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 526 PetscScalar value; 527 PetscErrorCode ierr; 528 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 529 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 530 PetscBool roworiented = aij->roworiented; 531 532 /* Some Variables required in the macro */ 533 Mat A = aij->A; 534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 535 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 536 MatScalar *aa = a->a; 537 PetscBool ignorezeroentries = a->ignorezeroentries; 538 Mat B = aij->B; 539 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 540 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 541 MatScalar *ba = b->a; 542 543 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 544 PetscInt nonew; 545 MatScalar *ap1,*ap2; 546 547 PetscFunctionBegin; 548 for (i=0; i<m; i++) { 549 if (im[i] < 0) continue; 550 #if defined(PETSC_USE_DEBUG) 551 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 552 #endif 553 if (im[i] >= rstart && im[i] < rend) { 554 row = im[i] - rstart; 555 lastcol1 = -1; 556 rp1 = aj + ai[row]; 557 ap1 = aa + ai[row]; 558 rmax1 = aimax[row]; 559 nrow1 = ailen[row]; 560 low1 = 0; 561 high1 = nrow1; 562 lastcol2 = -1; 563 rp2 = bj + bi[row]; 564 ap2 = ba + bi[row]; 565 rmax2 = bimax[row]; 566 nrow2 = bilen[row]; 567 low2 = 0; 568 high2 = nrow2; 569 570 for (j=0; j<n; j++) { 571 if (roworiented) value = v[i*n+j]; 572 else value = v[i+j*m]; 573 if (in[j] >= cstart && in[j] < cend) { 574 col = in[j] - cstart; 575 nonew = a->nonew; 576 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 577 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 578 } else if (in[j] < 0) continue; 579 #if defined(PETSC_USE_DEBUG) 580 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 581 #endif 582 else { 583 if (mat->was_assembled) { 584 if (!aij->colmap) { 585 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 586 } 587 #if defined(PETSC_USE_CTABLE) 588 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 589 col--; 590 #else 591 col = aij->colmap[in[j]] - 1; 592 #endif 593 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 594 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 595 col = in[j]; 596 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 597 B = aij->B; 598 b = (Mat_SeqAIJ*)B->data; 599 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 600 rp2 = bj + bi[row]; 601 ap2 = ba + bi[row]; 602 rmax2 = bimax[row]; 603 nrow2 = bilen[row]; 604 low2 = 0; 605 high2 = nrow2; 606 bm = aij->B->rmap->n; 607 ba = b->a; 608 } else if (col < 0) { 609 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 610 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 611 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 612 } 613 } else col = in[j]; 614 nonew = b->nonew; 615 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 616 } 617 } 618 } else { 619 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 620 if (!aij->donotstash) { 621 mat->assembled = PETSC_FALSE; 622 if (roworiented) { 623 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 624 } else { 625 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } 627 } 628 } 629 } 630 PetscFunctionReturn(0); 631 } 632 633 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 PetscErrorCode ierr; 637 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 638 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 639 640 PetscFunctionBegin; 641 for (i=0; i<m; i++) { 642 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 643 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 644 if (idxm[i] >= rstart && idxm[i] < rend) { 645 row = idxm[i] - rstart; 646 for (j=0; j<n; j++) { 647 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 648 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 649 if (idxn[j] >= cstart && idxn[j] < cend) { 650 col = idxn[j] - cstart; 651 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 652 } else { 653 if (!aij->colmap) { 654 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 655 } 656 #if defined(PETSC_USE_CTABLE) 657 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 658 col--; 659 #else 660 col = aij->colmap[idxn[j]] - 1; 661 #endif 662 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 663 else { 664 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 665 } 666 } 667 } 668 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 669 } 670 PetscFunctionReturn(0); 671 } 672 673 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 674 675 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 676 { 677 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 678 PetscErrorCode ierr; 679 PetscInt nstash,reallocs; 680 681 PetscFunctionBegin; 682 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 683 684 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 685 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 686 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 691 { 692 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 693 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 694 PetscErrorCode ierr; 695 PetscMPIInt n; 696 PetscInt i,j,rstart,ncols,flg; 697 PetscInt *row,*col; 698 PetscBool other_disassembled; 699 PetscScalar *val; 700 701 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 702 703 PetscFunctionBegin; 704 if (!aij->donotstash && !mat->nooffprocentries) { 705 while (1) { 706 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 707 if (!flg) break; 708 709 for (i=0; i<n; ) { 710 /* Now identify the consecutive vals belonging to the same row */ 711 for (j=i,rstart=row[j]; j<n; j++) { 712 if (row[j] != rstart) break; 713 } 714 if (j < n) ncols = j-i; 715 else ncols = n-i; 716 /* Now assemble all these values with a single function call */ 717 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 718 719 i = j; 720 } 721 } 722 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 723 } 724 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 725 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 726 727 /* determine if any processor has disassembled, if so we must 728 also disassemble ourselfs, in order that we may reassemble. */ 729 /* 730 if nonzero structure of submatrix B cannot change then we know that 731 no processor disassembled thus we can skip this stuff 732 */ 733 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 734 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 735 if (mat->was_assembled && !other_disassembled) { 736 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 737 } 738 } 739 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 740 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 741 } 742 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 743 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 744 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 745 746 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 747 748 aij->rowvalues = 0; 749 750 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 751 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 752 753 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 754 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 755 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 756 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 757 } 758 PetscFunctionReturn(0); 759 } 760 761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 762 { 763 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 764 PetscErrorCode ierr; 765 766 PetscFunctionBegin; 767 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 768 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 773 { 774 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 775 PetscInt *lrows; 776 PetscInt r, len; 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 /* get locally owned rows */ 781 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 782 /* fix right hand side if needed */ 783 if (x && b) { 784 const PetscScalar *xx; 785 PetscScalar *bb; 786 787 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 788 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 789 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 790 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 792 } 793 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 796 PetscBool cong; 797 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 798 if (cong) A->congruentlayouts = 1; 799 else A->congruentlayouts = 0; 800 } 801 if ((diag != 0.0) && A->congruentlayouts) { 802 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 803 } else if (diag != 0.0) { 804 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 805 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 806 for (r = 0; r < len; ++r) { 807 const PetscInt row = lrows[r] + A->rmap->rstart; 808 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 809 } 810 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 811 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 812 } else { 813 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 814 } 815 ierr = PetscFree(lrows);CHKERRQ(ierr); 816 817 /* only change matrix nonzero state if pattern was allowed to be changed */ 818 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 819 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 820 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 821 } 822 PetscFunctionReturn(0); 823 } 824 825 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 826 { 827 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 828 PetscErrorCode ierr; 829 PetscMPIInt n = A->rmap->n; 830 PetscInt i,j,r,m,p = 0,len = 0; 831 PetscInt *lrows,*owners = A->rmap->range; 832 PetscSFNode *rrows; 833 PetscSF sf; 834 const PetscScalar *xx; 835 PetscScalar *bb,*mask; 836 Vec xmask,lmask; 837 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 838 const PetscInt *aj, *ii,*ridx; 839 PetscScalar *aa; 840 841 PetscFunctionBegin; 842 /* Create SF where leaves are input rows and roots are owned rows */ 843 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 844 for (r = 0; r < n; ++r) lrows[r] = -1; 845 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 846 for (r = 0; r < N; ++r) { 847 const PetscInt idx = rows[r]; 848 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 849 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 850 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 851 } 852 rrows[r].rank = p; 853 rrows[r].index = rows[r] - owners[p]; 854 } 855 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 856 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 857 /* Collect flags for rows to be zeroed */ 858 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 859 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 860 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 861 /* Compress and put in row numbers */ 862 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 863 /* zero diagonal part of matrix */ 864 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 865 /* handle off diagonal part of matrix */ 866 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 867 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 868 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 869 for (i=0; i<len; i++) bb[lrows[i]] = 1; 870 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 871 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 874 if (x) { 875 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 878 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 879 } 880 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 881 /* remove zeroed rows of off diagonal matrix */ 882 ii = aij->i; 883 for (i=0; i<len; i++) { 884 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 885 } 886 /* loop over all elements of off process part of matrix zeroing removed columns*/ 887 if (aij->compressedrow.use) { 888 m = aij->compressedrow.nrows; 889 ii = aij->compressedrow.i; 890 ridx = aij->compressedrow.rindex; 891 for (i=0; i<m; i++) { 892 n = ii[i+1] - ii[i]; 893 aj = aij->j + ii[i]; 894 aa = aij->a + ii[i]; 895 896 for (j=0; j<n; j++) { 897 if (PetscAbsScalar(mask[*aj])) { 898 if (b) bb[*ridx] -= *aa*xx[*aj]; 899 *aa = 0.0; 900 } 901 aa++; 902 aj++; 903 } 904 ridx++; 905 } 906 } else { /* do not use compressed row format */ 907 m = l->B->rmap->n; 908 for (i=0; i<m; i++) { 909 n = ii[i+1] - ii[i]; 910 aj = aij->j + ii[i]; 911 aa = aij->a + ii[i]; 912 for (j=0; j<n; j++) { 913 if (PetscAbsScalar(mask[*aj])) { 914 if (b) bb[i] -= *aa*xx[*aj]; 915 *aa = 0.0; 916 } 917 aa++; 918 aj++; 919 } 920 } 921 } 922 if (x) { 923 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 924 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 925 } 926 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 927 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 928 ierr = PetscFree(lrows);CHKERRQ(ierr); 929 930 /* only change matrix nonzero state if pattern was allowed to be changed */ 931 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 932 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 933 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 934 } 935 PetscFunctionReturn(0); 936 } 937 938 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 939 { 940 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 941 PetscErrorCode ierr; 942 PetscInt nt; 943 VecScatter Mvctx = a->Mvctx; 944 945 PetscFunctionBegin; 946 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 947 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 948 949 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 951 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 957 { 958 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 959 PetscErrorCode ierr; 960 961 PetscFunctionBegin; 962 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscErrorCode ierr; 970 VecScatter Mvctx = a->Mvctx; 971 972 PetscFunctionBegin; 973 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 974 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 976 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 982 { 983 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscBool merged; 986 987 PetscFunctionBegin; 988 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 989 /* do nondiagonal part */ 990 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 991 if (!merged) { 992 /* send it on its way */ 993 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 /* do local part */ 995 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 996 /* receive remote parts: note this assumes the values are not actually */ 997 /* added in yy until the next line, */ 998 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 } else { 1000 /* do local part */ 1001 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1002 /* send it on its way */ 1003 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 /* values actually were received in the Begin() but we need to call this nop */ 1005 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1006 } 1007 PetscFunctionReturn(0); 1008 } 1009 1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1011 { 1012 MPI_Comm comm; 1013 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1014 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1015 IS Me,Notme; 1016 PetscErrorCode ierr; 1017 PetscInt M,N,first,last,*notme,i; 1018 PetscMPIInt size; 1019 1020 PetscFunctionBegin; 1021 /* Easy test: symmetric diagonal block */ 1022 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1023 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1024 if (!*f) PetscFunctionReturn(0); 1025 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1026 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1027 if (size == 1) PetscFunctionReturn(0); 1028 1029 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1030 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1031 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1032 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1033 for (i=0; i<first; i++) notme[i] = i; 1034 for (i=last; i<M; i++) notme[i-last+first] = i; 1035 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1036 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1037 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1038 Aoff = Aoffs[0]; 1039 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1040 Boff = Boffs[0]; 1041 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1042 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1043 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1044 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1045 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1046 ierr = PetscFree(notme);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1051 { 1052 PetscErrorCode ierr; 1053 1054 PetscFunctionBegin; 1055 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1060 { 1061 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1062 PetscErrorCode ierr; 1063 1064 PetscFunctionBegin; 1065 /* do nondiagonal part */ 1066 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1067 /* send it on its way */ 1068 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1069 /* do local part */ 1070 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1071 /* receive remote parts */ 1072 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 /* 1077 This only works correctly for square matrices where the subblock A->A is the 1078 diagonal block 1079 */ 1080 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1081 { 1082 PetscErrorCode ierr; 1083 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1084 1085 PetscFunctionBegin; 1086 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1087 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1088 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1093 { 1094 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1099 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1104 { 1105 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 #if defined(PETSC_USE_LOG) 1110 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1111 #endif 1112 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1113 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1114 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1115 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1116 #if defined(PETSC_USE_CTABLE) 1117 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1118 #else 1119 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1120 #endif 1121 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1122 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1123 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1124 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1125 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1126 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1127 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1128 1129 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1134 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1138 #if defined(PETSC_HAVE_ELEMENTAL) 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1140 #endif 1141 #if defined(PETSC_HAVE_HYPRE) 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1144 #endif 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1151 { 1152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1153 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1154 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1155 PetscErrorCode ierr; 1156 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1157 int fd; 1158 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1159 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1160 PetscScalar *column_values; 1161 PetscInt message_count,flowcontrolcount; 1162 FILE *file; 1163 1164 PetscFunctionBegin; 1165 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1166 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1167 nz = A->nz + B->nz; 1168 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1169 if (!rank) { 1170 header[0] = MAT_FILE_CLASSID; 1171 header[1] = mat->rmap->N; 1172 header[2] = mat->cmap->N; 1173 1174 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1175 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1176 /* get largest number of rows any processor has */ 1177 rlen = mat->rmap->n; 1178 range = mat->rmap->range; 1179 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1180 } else { 1181 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1182 rlen = mat->rmap->n; 1183 } 1184 1185 /* load up the local row counts */ 1186 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1187 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1188 1189 /* store the row lengths to the file */ 1190 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1191 if (!rank) { 1192 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1193 for (i=1; i<size; i++) { 1194 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1195 rlen = range[i+1] - range[i]; 1196 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1197 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1198 } 1199 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1200 } else { 1201 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1202 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1204 } 1205 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1206 1207 /* load up the local column indices */ 1208 nzmax = nz; /* th processor needs space a largest processor needs */ 1209 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1210 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1211 cnt = 0; 1212 for (i=0; i<mat->rmap->n; i++) { 1213 for (j=B->i[i]; j<B->i[i+1]; j++) { 1214 if ((col = garray[B->j[j]]) > cstart) break; 1215 column_indices[cnt++] = col; 1216 } 1217 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1218 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1219 } 1220 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1221 1222 /* store the column indices to the file */ 1223 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1224 if (!rank) { 1225 MPI_Status status; 1226 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1227 for (i=1; i<size; i++) { 1228 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1229 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1230 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1231 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1233 } 1234 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1235 } else { 1236 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1237 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1238 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1240 } 1241 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1242 1243 /* load up the local column values */ 1244 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1245 cnt = 0; 1246 for (i=0; i<mat->rmap->n; i++) { 1247 for (j=B->i[i]; j<B->i[i+1]; j++) { 1248 if (garray[B->j[j]] > cstart) break; 1249 column_values[cnt++] = B->a[j]; 1250 } 1251 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1252 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1253 } 1254 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1255 1256 /* store the column values to the file */ 1257 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1258 if (!rank) { 1259 MPI_Status status; 1260 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1261 for (i=1; i<size; i++) { 1262 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1263 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1264 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1265 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1266 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1267 } 1268 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1269 } else { 1270 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1271 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1272 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1273 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1274 } 1275 ierr = PetscFree(column_values);CHKERRQ(ierr); 1276 1277 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1278 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #include <petscdraw.h> 1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 PetscErrorCode ierr; 1287 PetscMPIInt rank = aij->rank,size = aij->size; 1288 PetscBool isdraw,iascii,isbinary; 1289 PetscViewer sviewer; 1290 PetscViewerFormat format; 1291 1292 PetscFunctionBegin; 1293 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1294 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1295 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1296 if (iascii) { 1297 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1298 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1299 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1300 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1301 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 for (i=0; i<(PetscInt)size; i++) { 1303 nmax = PetscMax(nmax,nz[i]); 1304 nmin = PetscMin(nmin,nz[i]); 1305 navg += nz[i]; 1306 } 1307 ierr = PetscFree(nz);CHKERRQ(ierr); 1308 navg = navg/size; 1309 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1313 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1314 MatInfo info; 1315 PetscBool inodes; 1316 1317 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1318 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1319 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1320 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1321 if (!inodes) { 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1323 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1324 } else { 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1326 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1327 } 1328 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1329 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1330 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1331 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1332 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1334 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1335 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1338 PetscInt inodecount,inodelimit,*inodes; 1339 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1340 if (inodes) { 1341 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1344 } 1345 PetscFunctionReturn(0); 1346 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1347 PetscFunctionReturn(0); 1348 } 1349 } else if (isbinary) { 1350 if (size == 1) { 1351 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1352 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1353 } else { 1354 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1355 } 1356 PetscFunctionReturn(0); 1357 } else if (isdraw) { 1358 PetscDraw draw; 1359 PetscBool isnull; 1360 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1361 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1362 if (isnull) PetscFunctionReturn(0); 1363 } 1364 1365 { 1366 /* assemble the entire matrix onto first processor. */ 1367 Mat A; 1368 Mat_SeqAIJ *Aloc; 1369 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1370 MatScalar *a; 1371 1372 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1373 if (!rank) { 1374 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1375 } else { 1376 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1377 } 1378 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1379 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1380 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1381 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1382 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1383 1384 /* copy over the A part */ 1385 Aloc = (Mat_SeqAIJ*)aij->A->data; 1386 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1387 row = mat->rmap->rstart; 1388 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1389 for (i=0; i<m; i++) { 1390 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1391 row++; 1392 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1393 } 1394 aj = Aloc->j; 1395 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1396 1397 /* copy over the B part */ 1398 Aloc = (Mat_SeqAIJ*)aij->B->data; 1399 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1400 row = mat->rmap->rstart; 1401 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1402 ct = cols; 1403 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1404 for (i=0; i<m; i++) { 1405 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1406 row++; 1407 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1408 } 1409 ierr = PetscFree(ct);CHKERRQ(ierr); 1410 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 /* 1413 Everyone has to call to draw the matrix since the graphics waits are 1414 synchronized across all processors that share the PetscDraw object 1415 */ 1416 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1417 if (!rank) { 1418 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1419 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1420 } 1421 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1422 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1423 ierr = MatDestroy(&A);CHKERRQ(ierr); 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1429 { 1430 PetscErrorCode ierr; 1431 PetscBool iascii,isdraw,issocket,isbinary; 1432 1433 PetscFunctionBegin; 1434 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1435 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1436 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1437 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1438 if (iascii || isdraw || isbinary || issocket) { 1439 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1440 } 1441 PetscFunctionReturn(0); 1442 } 1443 1444 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1445 { 1446 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1447 PetscErrorCode ierr; 1448 Vec bb1 = 0; 1449 PetscBool hasop; 1450 1451 PetscFunctionBegin; 1452 if (flag == SOR_APPLY_UPPER) { 1453 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1454 PetscFunctionReturn(0); 1455 } 1456 1457 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1458 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1459 } 1460 1461 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1462 if (flag & SOR_ZERO_INITIAL_GUESS) { 1463 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1464 its--; 1465 } 1466 1467 while (its--) { 1468 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1469 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1470 1471 /* update rhs: bb1 = bb - B*x */ 1472 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1473 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1474 1475 /* local sweep */ 1476 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1477 } 1478 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1479 if (flag & SOR_ZERO_INITIAL_GUESS) { 1480 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1481 its--; 1482 } 1483 while (its--) { 1484 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1485 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 1487 /* update rhs: bb1 = bb - B*x */ 1488 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1489 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1490 1491 /* local sweep */ 1492 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1493 } 1494 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1495 if (flag & SOR_ZERO_INITIAL_GUESS) { 1496 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1497 its--; 1498 } 1499 while (its--) { 1500 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1501 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1502 1503 /* update rhs: bb1 = bb - B*x */ 1504 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1505 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1506 1507 /* local sweep */ 1508 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1509 } 1510 } else if (flag & SOR_EISENSTAT) { 1511 Vec xx1; 1512 1513 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1514 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1515 1516 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1517 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 if (!mat->diag) { 1519 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1520 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1521 } 1522 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1523 if (hasop) { 1524 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1525 } else { 1526 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1527 } 1528 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1529 1530 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1531 1532 /* local sweep */ 1533 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1534 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1535 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1536 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1537 1538 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1539 1540 matin->factorerrortype = mat->A->factorerrortype; 1541 PetscFunctionReturn(0); 1542 } 1543 1544 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1545 { 1546 Mat aA,aB,Aperm; 1547 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1548 PetscScalar *aa,*ba; 1549 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1550 PetscSF rowsf,sf; 1551 IS parcolp = NULL; 1552 PetscBool done; 1553 PetscErrorCode ierr; 1554 1555 PetscFunctionBegin; 1556 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1557 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1558 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1559 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1560 1561 /* Invert row permutation to find out where my rows should go */ 1562 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1563 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1564 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1565 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1566 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1567 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1568 1569 /* Invert column permutation to find out where my columns should go */ 1570 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1571 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1572 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1573 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1574 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1575 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1576 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1577 1578 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1579 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1580 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1581 1582 /* Find out where my gcols should go */ 1583 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1584 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1585 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1586 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1587 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1588 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1589 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1590 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1591 1592 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1593 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1594 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1595 for (i=0; i<m; i++) { 1596 PetscInt row = rdest[i],rowner; 1597 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1598 for (j=ai[i]; j<ai[i+1]; j++) { 1599 PetscInt cowner,col = cdest[aj[j]]; 1600 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1601 if (rowner == cowner) dnnz[i]++; 1602 else onnz[i]++; 1603 } 1604 for (j=bi[i]; j<bi[i+1]; j++) { 1605 PetscInt cowner,col = gcdest[bj[j]]; 1606 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1607 if (rowner == cowner) dnnz[i]++; 1608 else onnz[i]++; 1609 } 1610 } 1611 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1612 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1613 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1614 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1615 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1616 1617 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1618 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1619 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1620 for (i=0; i<m; i++) { 1621 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1622 PetscInt j0,rowlen; 1623 rowlen = ai[i+1] - ai[i]; 1624 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1625 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1626 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1627 } 1628 rowlen = bi[i+1] - bi[i]; 1629 for (j0=j=0; j<rowlen; j0=j) { 1630 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1631 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1632 } 1633 } 1634 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1635 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1636 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1637 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1638 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1639 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1640 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1641 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1642 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1643 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1644 *B = Aperm; 1645 PetscFunctionReturn(0); 1646 } 1647 1648 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1649 { 1650 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1651 PetscErrorCode ierr; 1652 1653 PetscFunctionBegin; 1654 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1655 if (ghosts) *ghosts = aij->garray; 1656 PetscFunctionReturn(0); 1657 } 1658 1659 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1660 { 1661 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1662 Mat A = mat->A,B = mat->B; 1663 PetscErrorCode ierr; 1664 PetscReal isend[5],irecv[5]; 1665 1666 PetscFunctionBegin; 1667 info->block_size = 1.0; 1668 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1669 1670 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1671 isend[3] = info->memory; isend[4] = info->mallocs; 1672 1673 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1674 1675 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1676 isend[3] += info->memory; isend[4] += info->mallocs; 1677 if (flag == MAT_LOCAL) { 1678 info->nz_used = isend[0]; 1679 info->nz_allocated = isend[1]; 1680 info->nz_unneeded = isend[2]; 1681 info->memory = isend[3]; 1682 info->mallocs = isend[4]; 1683 } else if (flag == MAT_GLOBAL_MAX) { 1684 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1685 1686 info->nz_used = irecv[0]; 1687 info->nz_allocated = irecv[1]; 1688 info->nz_unneeded = irecv[2]; 1689 info->memory = irecv[3]; 1690 info->mallocs = irecv[4]; 1691 } else if (flag == MAT_GLOBAL_SUM) { 1692 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1693 1694 info->nz_used = irecv[0]; 1695 info->nz_allocated = irecv[1]; 1696 info->nz_unneeded = irecv[2]; 1697 info->memory = irecv[3]; 1698 info->mallocs = irecv[4]; 1699 } 1700 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1701 info->fill_ratio_needed = 0; 1702 info->factor_mallocs = 0; 1703 PetscFunctionReturn(0); 1704 } 1705 1706 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1707 { 1708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1709 PetscErrorCode ierr; 1710 1711 PetscFunctionBegin; 1712 switch (op) { 1713 case MAT_NEW_NONZERO_LOCATIONS: 1714 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1715 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1716 case MAT_KEEP_NONZERO_PATTERN: 1717 case MAT_NEW_NONZERO_LOCATION_ERR: 1718 case MAT_USE_INODES: 1719 case MAT_IGNORE_ZERO_ENTRIES: 1720 MatCheckPreallocated(A,1); 1721 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1722 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1723 break; 1724 case MAT_ROW_ORIENTED: 1725 MatCheckPreallocated(A,1); 1726 a->roworiented = flg; 1727 1728 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1729 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1730 break; 1731 case MAT_NEW_DIAGONALS: 1732 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1733 break; 1734 case MAT_IGNORE_OFF_PROC_ENTRIES: 1735 a->donotstash = flg; 1736 break; 1737 case MAT_SPD: 1738 A->spd_set = PETSC_TRUE; 1739 A->spd = flg; 1740 if (flg) { 1741 A->symmetric = PETSC_TRUE; 1742 A->structurally_symmetric = PETSC_TRUE; 1743 A->symmetric_set = PETSC_TRUE; 1744 A->structurally_symmetric_set = PETSC_TRUE; 1745 } 1746 break; 1747 case MAT_SYMMETRIC: 1748 MatCheckPreallocated(A,1); 1749 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1750 break; 1751 case MAT_STRUCTURALLY_SYMMETRIC: 1752 MatCheckPreallocated(A,1); 1753 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1754 break; 1755 case MAT_HERMITIAN: 1756 MatCheckPreallocated(A,1); 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 break; 1759 case MAT_SYMMETRY_ETERNAL: 1760 MatCheckPreallocated(A,1); 1761 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1762 break; 1763 case MAT_SUBMAT_SINGLEIS: 1764 A->submat_singleis = flg; 1765 break; 1766 case MAT_STRUCTURE_ONLY: 1767 /* The option is handled directly by MatSetOption() */ 1768 break; 1769 default: 1770 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1771 } 1772 PetscFunctionReturn(0); 1773 } 1774 1775 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1776 { 1777 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1778 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1779 PetscErrorCode ierr; 1780 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1781 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1782 PetscInt *cmap,*idx_p; 1783 1784 PetscFunctionBegin; 1785 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1786 mat->getrowactive = PETSC_TRUE; 1787 1788 if (!mat->rowvalues && (idx || v)) { 1789 /* 1790 allocate enough space to hold information from the longest row. 1791 */ 1792 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1793 PetscInt max = 1,tmp; 1794 for (i=0; i<matin->rmap->n; i++) { 1795 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1796 if (max < tmp) max = tmp; 1797 } 1798 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1799 } 1800 1801 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1802 lrow = row - rstart; 1803 1804 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1805 if (!v) {pvA = 0; pvB = 0;} 1806 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1807 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1808 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1809 nztot = nzA + nzB; 1810 1811 cmap = mat->garray; 1812 if (v || idx) { 1813 if (nztot) { 1814 /* Sort by increasing column numbers, assuming A and B already sorted */ 1815 PetscInt imark = -1; 1816 if (v) { 1817 *v = v_p = mat->rowvalues; 1818 for (i=0; i<nzB; i++) { 1819 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1820 else break; 1821 } 1822 imark = i; 1823 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1824 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1825 } 1826 if (idx) { 1827 *idx = idx_p = mat->rowindices; 1828 if (imark > -1) { 1829 for (i=0; i<imark; i++) { 1830 idx_p[i] = cmap[cworkB[i]]; 1831 } 1832 } else { 1833 for (i=0; i<nzB; i++) { 1834 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1835 else break; 1836 } 1837 imark = i; 1838 } 1839 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1840 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1841 } 1842 } else { 1843 if (idx) *idx = 0; 1844 if (v) *v = 0; 1845 } 1846 } 1847 *nz = nztot; 1848 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1849 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1854 { 1855 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1856 1857 PetscFunctionBegin; 1858 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1859 aij->getrowactive = PETSC_FALSE; 1860 PetscFunctionReturn(0); 1861 } 1862 1863 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1864 { 1865 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1866 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1867 PetscErrorCode ierr; 1868 PetscInt i,j,cstart = mat->cmap->rstart; 1869 PetscReal sum = 0.0; 1870 MatScalar *v; 1871 1872 PetscFunctionBegin; 1873 if (aij->size == 1) { 1874 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1875 } else { 1876 if (type == NORM_FROBENIUS) { 1877 v = amat->a; 1878 for (i=0; i<amat->nz; i++) { 1879 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1880 } 1881 v = bmat->a; 1882 for (i=0; i<bmat->nz; i++) { 1883 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1884 } 1885 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1886 *norm = PetscSqrtReal(*norm); 1887 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1888 } else if (type == NORM_1) { /* max column norm */ 1889 PetscReal *tmp,*tmp2; 1890 PetscInt *jj,*garray = aij->garray; 1891 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1892 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1893 *norm = 0.0; 1894 v = amat->a; jj = amat->j; 1895 for (j=0; j<amat->nz; j++) { 1896 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1897 } 1898 v = bmat->a; jj = bmat->j; 1899 for (j=0; j<bmat->nz; j++) { 1900 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1901 } 1902 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1903 for (j=0; j<mat->cmap->N; j++) { 1904 if (tmp2[j] > *norm) *norm = tmp2[j]; 1905 } 1906 ierr = PetscFree(tmp);CHKERRQ(ierr); 1907 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1908 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1909 } else if (type == NORM_INFINITY) { /* max row norm */ 1910 PetscReal ntemp = 0.0; 1911 for (j=0; j<aij->A->rmap->n; j++) { 1912 v = amat->a + amat->i[j]; 1913 sum = 0.0; 1914 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1915 sum += PetscAbsScalar(*v); v++; 1916 } 1917 v = bmat->a + bmat->i[j]; 1918 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1919 sum += PetscAbsScalar(*v); v++; 1920 } 1921 if (sum > ntemp) ntemp = sum; 1922 } 1923 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1924 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1925 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1926 } 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1931 { 1932 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1933 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1934 PetscErrorCode ierr; 1935 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1936 PetscInt cstart = A->cmap->rstart,ncol; 1937 Mat B; 1938 MatScalar *array; 1939 1940 PetscFunctionBegin; 1941 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1942 1943 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1944 ai = Aloc->i; aj = Aloc->j; 1945 bi = Bloc->i; bj = Bloc->j; 1946 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1947 PetscInt *d_nnz,*g_nnz,*o_nnz; 1948 PetscSFNode *oloc; 1949 PETSC_UNUSED PetscSF sf; 1950 1951 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1952 /* compute d_nnz for preallocation */ 1953 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1954 for (i=0; i<ai[ma]; i++) { 1955 d_nnz[aj[i]]++; 1956 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1957 } 1958 /* compute local off-diagonal contributions */ 1959 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1960 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1961 /* map those to global */ 1962 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1963 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1964 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1965 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1966 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1967 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1968 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1969 1970 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1971 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1972 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1973 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1974 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1975 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1976 } else { 1977 B = *matout; 1978 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1979 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1980 } 1981 1982 /* copy over the A part */ 1983 array = Aloc->a; 1984 row = A->rmap->rstart; 1985 for (i=0; i<ma; i++) { 1986 ncol = ai[i+1]-ai[i]; 1987 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1988 row++; 1989 array += ncol; aj += ncol; 1990 } 1991 aj = Aloc->j; 1992 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1993 1994 /* copy over the B part */ 1995 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1996 array = Bloc->a; 1997 row = A->rmap->rstart; 1998 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1999 cols_tmp = cols; 2000 for (i=0; i<mb; i++) { 2001 ncol = bi[i+1]-bi[i]; 2002 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2003 row++; 2004 array += ncol; cols_tmp += ncol; 2005 } 2006 ierr = PetscFree(cols);CHKERRQ(ierr); 2007 2008 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2009 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2010 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2011 *matout = B; 2012 } else { 2013 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2014 } 2015 PetscFunctionReturn(0); 2016 } 2017 2018 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2019 { 2020 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2021 Mat a = aij->A,b = aij->B; 2022 PetscErrorCode ierr; 2023 PetscInt s1,s2,s3; 2024 2025 PetscFunctionBegin; 2026 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2027 if (rr) { 2028 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2029 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2030 /* Overlap communication with computation. */ 2031 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2032 } 2033 if (ll) { 2034 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2035 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2036 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2037 } 2038 /* scale the diagonal block */ 2039 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2040 2041 if (rr) { 2042 /* Do a scatter end and then right scale the off-diagonal block */ 2043 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2044 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2045 } 2046 PetscFunctionReturn(0); 2047 } 2048 2049 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2050 { 2051 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2052 PetscErrorCode ierr; 2053 2054 PetscFunctionBegin; 2055 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2056 PetscFunctionReturn(0); 2057 } 2058 2059 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2060 { 2061 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2062 Mat a,b,c,d; 2063 PetscBool flg; 2064 PetscErrorCode ierr; 2065 2066 PetscFunctionBegin; 2067 a = matA->A; b = matA->B; 2068 c = matB->A; d = matB->B; 2069 2070 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2071 if (flg) { 2072 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2073 } 2074 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2079 { 2080 PetscErrorCode ierr; 2081 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2082 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2083 2084 PetscFunctionBegin; 2085 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2086 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2087 /* because of the column compression in the off-processor part of the matrix a->B, 2088 the number of columns in a->B and b->B may be different, hence we cannot call 2089 the MatCopy() directly on the two parts. If need be, we can provide a more 2090 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2091 then copying the submatrices */ 2092 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2093 } else { 2094 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2095 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2096 } 2097 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2098 PetscFunctionReturn(0); 2099 } 2100 2101 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2102 { 2103 PetscErrorCode ierr; 2104 2105 PetscFunctionBegin; 2106 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2107 PetscFunctionReturn(0); 2108 } 2109 2110 /* 2111 Computes the number of nonzeros per row needed for preallocation when X and Y 2112 have different nonzero structure. 2113 */ 2114 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2115 { 2116 PetscInt i,j,k,nzx,nzy; 2117 2118 PetscFunctionBegin; 2119 /* Set the number of nonzeros in the new matrix */ 2120 for (i=0; i<m; i++) { 2121 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2122 nzx = xi[i+1] - xi[i]; 2123 nzy = yi[i+1] - yi[i]; 2124 nnz[i] = 0; 2125 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2126 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2127 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2128 nnz[i]++; 2129 } 2130 for (; k<nzy; k++) nnz[i]++; 2131 } 2132 PetscFunctionReturn(0); 2133 } 2134 2135 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2136 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2137 { 2138 PetscErrorCode ierr; 2139 PetscInt m = Y->rmap->N; 2140 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2141 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2142 2143 PetscFunctionBegin; 2144 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2149 { 2150 PetscErrorCode ierr; 2151 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2152 PetscBLASInt bnz,one=1; 2153 Mat_SeqAIJ *x,*y; 2154 2155 PetscFunctionBegin; 2156 if (str == SAME_NONZERO_PATTERN) { 2157 PetscScalar alpha = a; 2158 x = (Mat_SeqAIJ*)xx->A->data; 2159 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2160 y = (Mat_SeqAIJ*)yy->A->data; 2161 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2162 x = (Mat_SeqAIJ*)xx->B->data; 2163 y = (Mat_SeqAIJ*)yy->B->data; 2164 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2165 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2166 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2167 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2168 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2169 } else { 2170 Mat B; 2171 PetscInt *nnz_d,*nnz_o; 2172 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2173 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2174 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2175 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2176 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2177 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2178 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2179 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2180 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2181 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2182 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2183 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2184 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2185 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2186 } 2187 PetscFunctionReturn(0); 2188 } 2189 2190 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2191 2192 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2193 { 2194 #if defined(PETSC_USE_COMPLEX) 2195 PetscErrorCode ierr; 2196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2197 2198 PetscFunctionBegin; 2199 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2200 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2201 #else 2202 PetscFunctionBegin; 2203 #endif 2204 PetscFunctionReturn(0); 2205 } 2206 2207 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2208 { 2209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2210 PetscErrorCode ierr; 2211 2212 PetscFunctionBegin; 2213 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2214 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2215 PetscFunctionReturn(0); 2216 } 2217 2218 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2219 { 2220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2221 PetscErrorCode ierr; 2222 2223 PetscFunctionBegin; 2224 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2225 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2226 PetscFunctionReturn(0); 2227 } 2228 2229 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2230 { 2231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2232 PetscErrorCode ierr; 2233 PetscInt i,*idxb = 0; 2234 PetscScalar *va,*vb; 2235 Vec vtmp; 2236 2237 PetscFunctionBegin; 2238 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2239 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2240 if (idx) { 2241 for (i=0; i<A->rmap->n; i++) { 2242 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2243 } 2244 } 2245 2246 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2247 if (idx) { 2248 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2249 } 2250 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2251 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2252 2253 for (i=0; i<A->rmap->n; i++) { 2254 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2255 va[i] = vb[i]; 2256 if (idx) idx[i] = a->garray[idxb[i]]; 2257 } 2258 } 2259 2260 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2261 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2262 ierr = PetscFree(idxb);CHKERRQ(ierr); 2263 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2268 { 2269 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2270 PetscErrorCode ierr; 2271 PetscInt i,*idxb = 0; 2272 PetscScalar *va,*vb; 2273 Vec vtmp; 2274 2275 PetscFunctionBegin; 2276 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2277 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2278 if (idx) { 2279 for (i=0; i<A->cmap->n; i++) { 2280 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2281 } 2282 } 2283 2284 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2285 if (idx) { 2286 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2287 } 2288 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2289 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2290 2291 for (i=0; i<A->rmap->n; i++) { 2292 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2293 va[i] = vb[i]; 2294 if (idx) idx[i] = a->garray[idxb[i]]; 2295 } 2296 } 2297 2298 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2299 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2300 ierr = PetscFree(idxb);CHKERRQ(ierr); 2301 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2306 { 2307 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2308 PetscInt n = A->rmap->n; 2309 PetscInt cstart = A->cmap->rstart; 2310 PetscInt *cmap = mat->garray; 2311 PetscInt *diagIdx, *offdiagIdx; 2312 Vec diagV, offdiagV; 2313 PetscScalar *a, *diagA, *offdiagA; 2314 PetscInt r; 2315 PetscErrorCode ierr; 2316 2317 PetscFunctionBegin; 2318 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2319 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2320 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2321 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2322 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2323 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2324 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2325 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2326 for (r = 0; r < n; ++r) { 2327 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2328 a[r] = diagA[r]; 2329 idx[r] = cstart + diagIdx[r]; 2330 } else { 2331 a[r] = offdiagA[r]; 2332 idx[r] = cmap[offdiagIdx[r]]; 2333 } 2334 } 2335 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2336 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2337 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2338 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2339 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2340 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2345 { 2346 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2347 PetscInt n = A->rmap->n; 2348 PetscInt cstart = A->cmap->rstart; 2349 PetscInt *cmap = mat->garray; 2350 PetscInt *diagIdx, *offdiagIdx; 2351 Vec diagV, offdiagV; 2352 PetscScalar *a, *diagA, *offdiagA; 2353 PetscInt r; 2354 PetscErrorCode ierr; 2355 2356 PetscFunctionBegin; 2357 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2358 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2359 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2360 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2361 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2362 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2363 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2364 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2365 for (r = 0; r < n; ++r) { 2366 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2367 a[r] = diagA[r]; 2368 idx[r] = cstart + diagIdx[r]; 2369 } else { 2370 a[r] = offdiagA[r]; 2371 idx[r] = cmap[offdiagIdx[r]]; 2372 } 2373 } 2374 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2375 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2376 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2377 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2378 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2379 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2384 { 2385 PetscErrorCode ierr; 2386 Mat *dummy; 2387 2388 PetscFunctionBegin; 2389 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2390 *newmat = *dummy; 2391 ierr = PetscFree(dummy);CHKERRQ(ierr); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2396 { 2397 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2398 PetscErrorCode ierr; 2399 2400 PetscFunctionBegin; 2401 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2402 A->factorerrortype = a->A->factorerrortype; 2403 PetscFunctionReturn(0); 2404 } 2405 2406 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2407 { 2408 PetscErrorCode ierr; 2409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2410 2411 PetscFunctionBegin; 2412 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2413 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2414 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2415 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2420 { 2421 PetscFunctionBegin; 2422 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2423 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2424 PetscFunctionReturn(0); 2425 } 2426 2427 /*@ 2428 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2429 2430 Collective on Mat 2431 2432 Input Parameters: 2433 + A - the matrix 2434 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2435 2436 Level: advanced 2437 2438 @*/ 2439 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2440 { 2441 PetscErrorCode ierr; 2442 2443 PetscFunctionBegin; 2444 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2445 PetscFunctionReturn(0); 2446 } 2447 2448 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2449 { 2450 PetscErrorCode ierr; 2451 PetscBool sc = PETSC_FALSE,flg; 2452 2453 PetscFunctionBegin; 2454 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2455 ierr = PetscObjectOptionsBegin((PetscObject)A); 2456 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2457 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2458 if (flg) { 2459 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2460 } 2461 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2466 { 2467 PetscErrorCode ierr; 2468 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2469 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2470 2471 PetscFunctionBegin; 2472 if (!Y->preallocated) { 2473 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2474 } else if (!aij->nz) { 2475 PetscInt nonew = aij->nonew; 2476 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2477 aij->nonew = nonew; 2478 } 2479 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2480 PetscFunctionReturn(0); 2481 } 2482 2483 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2484 { 2485 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2486 PetscErrorCode ierr; 2487 2488 PetscFunctionBegin; 2489 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2490 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2491 if (d) { 2492 PetscInt rstart; 2493 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2494 *d += rstart; 2495 2496 } 2497 PetscFunctionReturn(0); 2498 } 2499 2500 2501 /* -------------------------------------------------------------------*/ 2502 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2503 MatGetRow_MPIAIJ, 2504 MatRestoreRow_MPIAIJ, 2505 MatMult_MPIAIJ, 2506 /* 4*/ MatMultAdd_MPIAIJ, 2507 MatMultTranspose_MPIAIJ, 2508 MatMultTransposeAdd_MPIAIJ, 2509 0, 2510 0, 2511 0, 2512 /*10*/ 0, 2513 0, 2514 0, 2515 MatSOR_MPIAIJ, 2516 MatTranspose_MPIAIJ, 2517 /*15*/ MatGetInfo_MPIAIJ, 2518 MatEqual_MPIAIJ, 2519 MatGetDiagonal_MPIAIJ, 2520 MatDiagonalScale_MPIAIJ, 2521 MatNorm_MPIAIJ, 2522 /*20*/ MatAssemblyBegin_MPIAIJ, 2523 MatAssemblyEnd_MPIAIJ, 2524 MatSetOption_MPIAIJ, 2525 MatZeroEntries_MPIAIJ, 2526 /*24*/ MatZeroRows_MPIAIJ, 2527 0, 2528 0, 2529 0, 2530 0, 2531 /*29*/ MatSetUp_MPIAIJ, 2532 0, 2533 0, 2534 MatGetDiagonalBlock_MPIAIJ, 2535 0, 2536 /*34*/ MatDuplicate_MPIAIJ, 2537 0, 2538 0, 2539 0, 2540 0, 2541 /*39*/ MatAXPY_MPIAIJ, 2542 MatCreateSubMatrices_MPIAIJ, 2543 MatIncreaseOverlap_MPIAIJ, 2544 MatGetValues_MPIAIJ, 2545 MatCopy_MPIAIJ, 2546 /*44*/ MatGetRowMax_MPIAIJ, 2547 MatScale_MPIAIJ, 2548 MatShift_MPIAIJ, 2549 MatDiagonalSet_MPIAIJ, 2550 MatZeroRowsColumns_MPIAIJ, 2551 /*49*/ MatSetRandom_MPIAIJ, 2552 0, 2553 0, 2554 0, 2555 0, 2556 /*54*/ MatFDColoringCreate_MPIXAIJ, 2557 0, 2558 MatSetUnfactored_MPIAIJ, 2559 MatPermute_MPIAIJ, 2560 0, 2561 /*59*/ MatCreateSubMatrix_MPIAIJ, 2562 MatDestroy_MPIAIJ, 2563 MatView_MPIAIJ, 2564 0, 2565 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2566 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2567 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2568 0, 2569 0, 2570 0, 2571 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2572 MatGetRowMinAbs_MPIAIJ, 2573 0, 2574 0, 2575 0, 2576 0, 2577 /*75*/ MatFDColoringApply_AIJ, 2578 MatSetFromOptions_MPIAIJ, 2579 0, 2580 0, 2581 MatFindZeroDiagonals_MPIAIJ, 2582 /*80*/ 0, 2583 0, 2584 0, 2585 /*83*/ MatLoad_MPIAIJ, 2586 MatIsSymmetric_MPIAIJ, 2587 0, 2588 0, 2589 0, 2590 0, 2591 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2592 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2593 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2594 MatPtAP_MPIAIJ_MPIAIJ, 2595 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2596 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2597 0, 2598 0, 2599 0, 2600 0, 2601 /*99*/ 0, 2602 0, 2603 0, 2604 MatConjugate_MPIAIJ, 2605 0, 2606 /*104*/MatSetValuesRow_MPIAIJ, 2607 MatRealPart_MPIAIJ, 2608 MatImaginaryPart_MPIAIJ, 2609 0, 2610 0, 2611 /*109*/0, 2612 0, 2613 MatGetRowMin_MPIAIJ, 2614 0, 2615 MatMissingDiagonal_MPIAIJ, 2616 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2617 0, 2618 MatGetGhosts_MPIAIJ, 2619 0, 2620 0, 2621 /*119*/0, 2622 0, 2623 0, 2624 0, 2625 MatGetMultiProcBlock_MPIAIJ, 2626 /*124*/MatFindNonzeroRows_MPIAIJ, 2627 MatGetColumnNorms_MPIAIJ, 2628 MatInvertBlockDiagonal_MPIAIJ, 2629 0, 2630 MatCreateSubMatricesMPI_MPIAIJ, 2631 /*129*/0, 2632 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2633 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2634 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2635 0, 2636 /*134*/0, 2637 0, 2638 MatRARt_MPIAIJ_MPIAIJ, 2639 0, 2640 0, 2641 /*139*/MatSetBlockSizes_MPIAIJ, 2642 0, 2643 0, 2644 MatFDColoringSetUp_MPIXAIJ, 2645 MatFindOffBlockDiagonalEntries_MPIAIJ, 2646 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2647 }; 2648 2649 /* ----------------------------------------------------------------------------------------*/ 2650 2651 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2652 { 2653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2654 PetscErrorCode ierr; 2655 2656 PetscFunctionBegin; 2657 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2658 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2659 PetscFunctionReturn(0); 2660 } 2661 2662 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2663 { 2664 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2665 PetscErrorCode ierr; 2666 2667 PetscFunctionBegin; 2668 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2669 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2670 PetscFunctionReturn(0); 2671 } 2672 2673 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2674 { 2675 Mat_MPIAIJ *b; 2676 PetscErrorCode ierr; 2677 2678 PetscFunctionBegin; 2679 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2680 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2681 b = (Mat_MPIAIJ*)B->data; 2682 2683 #if defined(PETSC_USE_CTABLE) 2684 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2685 #else 2686 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2687 #endif 2688 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2689 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2690 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2691 2692 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2693 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2694 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2695 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2696 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2697 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2698 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2699 2700 if (!B->preallocated) { 2701 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2702 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2703 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2704 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2705 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2706 } 2707 2708 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2709 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2710 B->preallocated = PETSC_TRUE; 2711 B->was_assembled = PETSC_FALSE; 2712 B->assembled = PETSC_FALSE;; 2713 PetscFunctionReturn(0); 2714 } 2715 2716 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2717 { 2718 Mat_MPIAIJ *b; 2719 PetscErrorCode ierr; 2720 2721 PetscFunctionBegin; 2722 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2723 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2724 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2725 b = (Mat_MPIAIJ*)B->data; 2726 2727 #if defined(PETSC_USE_CTABLE) 2728 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2729 #else 2730 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2731 #endif 2732 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2733 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2734 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2735 2736 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2737 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2738 B->preallocated = PETSC_TRUE; 2739 B->was_assembled = PETSC_FALSE; 2740 B->assembled = PETSC_FALSE; 2741 PetscFunctionReturn(0); 2742 } 2743 2744 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2745 { 2746 Mat mat; 2747 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2748 PetscErrorCode ierr; 2749 2750 PetscFunctionBegin; 2751 *newmat = 0; 2752 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2753 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2754 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2755 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2756 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2757 a = (Mat_MPIAIJ*)mat->data; 2758 2759 mat->factortype = matin->factortype; 2760 mat->assembled = PETSC_TRUE; 2761 mat->insertmode = NOT_SET_VALUES; 2762 mat->preallocated = PETSC_TRUE; 2763 2764 a->size = oldmat->size; 2765 a->rank = oldmat->rank; 2766 a->donotstash = oldmat->donotstash; 2767 a->roworiented = oldmat->roworiented; 2768 a->rowindices = 0; 2769 a->rowvalues = 0; 2770 a->getrowactive = PETSC_FALSE; 2771 2772 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2773 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2774 2775 if (oldmat->colmap) { 2776 #if defined(PETSC_USE_CTABLE) 2777 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2778 #else 2779 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2780 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2781 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2782 #endif 2783 } else a->colmap = 0; 2784 if (oldmat->garray) { 2785 PetscInt len; 2786 len = oldmat->B->cmap->n; 2787 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2788 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2789 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2790 } else a->garray = 0; 2791 2792 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2793 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2794 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2795 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2796 2797 if (oldmat->Mvctx_mpi1) { 2798 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2799 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2800 } 2801 2802 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2803 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2804 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2805 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2806 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2807 *newmat = mat; 2808 PetscFunctionReturn(0); 2809 } 2810 2811 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2812 { 2813 PetscScalar *vals,*svals; 2814 MPI_Comm comm; 2815 PetscErrorCode ierr; 2816 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2817 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2818 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2819 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2820 PetscInt cend,cstart,n,*rowners; 2821 int fd; 2822 PetscInt bs = newMat->rmap->bs; 2823 2824 PetscFunctionBegin; 2825 /* force binary viewer to load .info file if it has not yet done so */ 2826 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2827 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2828 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2829 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2830 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2831 if (!rank) { 2832 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2833 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2834 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2835 } 2836 2837 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2838 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2839 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2840 if (bs < 0) bs = 1; 2841 2842 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2843 M = header[1]; N = header[2]; 2844 2845 /* If global sizes are set, check if they are consistent with that given in the file */ 2846 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2847 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2848 2849 /* determine ownership of all (block) rows */ 2850 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2851 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2852 else m = newMat->rmap->n; /* Set by user */ 2853 2854 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2855 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2856 2857 /* First process needs enough room for process with most rows */ 2858 if (!rank) { 2859 mmax = rowners[1]; 2860 for (i=2; i<=size; i++) { 2861 mmax = PetscMax(mmax, rowners[i]); 2862 } 2863 } else mmax = -1; /* unused, but compilers complain */ 2864 2865 rowners[0] = 0; 2866 for (i=2; i<=size; i++) { 2867 rowners[i] += rowners[i-1]; 2868 } 2869 rstart = rowners[rank]; 2870 rend = rowners[rank+1]; 2871 2872 /* distribute row lengths to all processors */ 2873 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2874 if (!rank) { 2875 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2876 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2877 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2878 for (j=0; j<m; j++) { 2879 procsnz[0] += ourlens[j]; 2880 } 2881 for (i=1; i<size; i++) { 2882 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2883 /* calculate the number of nonzeros on each processor */ 2884 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2885 procsnz[i] += rowlengths[j]; 2886 } 2887 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2888 } 2889 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2890 } else { 2891 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2892 } 2893 2894 if (!rank) { 2895 /* determine max buffer needed and allocate it */ 2896 maxnz = 0; 2897 for (i=0; i<size; i++) { 2898 maxnz = PetscMax(maxnz,procsnz[i]); 2899 } 2900 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2901 2902 /* read in my part of the matrix column indices */ 2903 nz = procsnz[0]; 2904 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2905 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2906 2907 /* read in every one elses and ship off */ 2908 for (i=1; i<size; i++) { 2909 nz = procsnz[i]; 2910 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2911 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2912 } 2913 ierr = PetscFree(cols);CHKERRQ(ierr); 2914 } else { 2915 /* determine buffer space needed for message */ 2916 nz = 0; 2917 for (i=0; i<m; i++) { 2918 nz += ourlens[i]; 2919 } 2920 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2921 2922 /* receive message of column indices*/ 2923 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2924 } 2925 2926 /* determine column ownership if matrix is not square */ 2927 if (N != M) { 2928 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2929 else n = newMat->cmap->n; 2930 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2931 cstart = cend - n; 2932 } else { 2933 cstart = rstart; 2934 cend = rend; 2935 n = cend - cstart; 2936 } 2937 2938 /* loop over local rows, determining number of off diagonal entries */ 2939 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2940 jj = 0; 2941 for (i=0; i<m; i++) { 2942 for (j=0; j<ourlens[i]; j++) { 2943 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2944 jj++; 2945 } 2946 } 2947 2948 for (i=0; i<m; i++) { 2949 ourlens[i] -= offlens[i]; 2950 } 2951 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2952 2953 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2954 2955 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2956 2957 for (i=0; i<m; i++) { 2958 ourlens[i] += offlens[i]; 2959 } 2960 2961 if (!rank) { 2962 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2963 2964 /* read in my part of the matrix numerical values */ 2965 nz = procsnz[0]; 2966 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2967 2968 /* insert into matrix */ 2969 jj = rstart; 2970 smycols = mycols; 2971 svals = vals; 2972 for (i=0; i<m; i++) { 2973 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2974 smycols += ourlens[i]; 2975 svals += ourlens[i]; 2976 jj++; 2977 } 2978 2979 /* read in other processors and ship out */ 2980 for (i=1; i<size; i++) { 2981 nz = procsnz[i]; 2982 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2983 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2984 } 2985 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2986 } else { 2987 /* receive numeric values */ 2988 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2989 2990 /* receive message of values*/ 2991 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2992 2993 /* insert into matrix */ 2994 jj = rstart; 2995 smycols = mycols; 2996 svals = vals; 2997 for (i=0; i<m; i++) { 2998 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2999 smycols += ourlens[i]; 3000 svals += ourlens[i]; 3001 jj++; 3002 } 3003 } 3004 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3005 ierr = PetscFree(vals);CHKERRQ(ierr); 3006 ierr = PetscFree(mycols);CHKERRQ(ierr); 3007 ierr = PetscFree(rowners);CHKERRQ(ierr); 3008 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3009 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3010 PetscFunctionReturn(0); 3011 } 3012 3013 /* Not scalable because of ISAllGather() unless getting all columns. */ 3014 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3015 { 3016 PetscErrorCode ierr; 3017 IS iscol_local; 3018 PetscBool isstride; 3019 PetscMPIInt lisstride=0,gisstride; 3020 3021 PetscFunctionBegin; 3022 /* check if we are grabbing all columns*/ 3023 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3024 3025 if (isstride) { 3026 PetscInt start,len,mstart,mlen; 3027 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3028 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3029 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3030 if (mstart == start && mlen-mstart == len) lisstride = 1; 3031 } 3032 3033 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3034 if (gisstride) { 3035 PetscInt N; 3036 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3037 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3038 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3039 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3040 } else { 3041 PetscInt cbs; 3042 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3043 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3044 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3045 } 3046 3047 *isseq = iscol_local; 3048 PetscFunctionReturn(0); 3049 } 3050 3051 /* 3052 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3053 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3054 3055 Input Parameters: 3056 mat - matrix 3057 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3058 i.e., mat->rstart <= isrow[i] < mat->rend 3059 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3060 i.e., mat->cstart <= iscol[i] < mat->cend 3061 Output Parameter: 3062 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3063 iscol_o - sequential column index set for retrieving mat->B 3064 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3065 */ 3066 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3067 { 3068 PetscErrorCode ierr; 3069 Vec x,cmap; 3070 const PetscInt *is_idx; 3071 PetscScalar *xarray,*cmaparray; 3072 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3073 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3074 Mat B=a->B; 3075 Vec lvec=a->lvec,lcmap; 3076 PetscInt i,cstart,cend,Bn=B->cmap->N; 3077 MPI_Comm comm; 3078 VecScatter Mvctx=a->Mvctx; 3079 3080 PetscFunctionBegin; 3081 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3082 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3083 3084 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3085 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3086 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3087 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3088 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3089 3090 /* Get start indices */ 3091 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3092 isstart -= ncols; 3093 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3094 3095 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3096 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3097 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3098 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3099 for (i=0; i<ncols; i++) { 3100 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3101 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3102 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3103 } 3104 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3105 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3106 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3107 3108 /* Get iscol_d */ 3109 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3110 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3111 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3112 3113 /* Get isrow_d */ 3114 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3115 rstart = mat->rmap->rstart; 3116 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3117 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3118 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3119 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3120 3121 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3122 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3123 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3124 3125 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3126 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3127 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3128 3129 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3130 3131 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3132 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3133 3134 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3135 /* off-process column indices */ 3136 count = 0; 3137 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3138 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3139 3140 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3141 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3142 for (i=0; i<Bn; i++) { 3143 if (PetscRealPart(xarray[i]) > -1.0) { 3144 idx[count] = i; /* local column index in off-diagonal part B */ 3145 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3146 count++; 3147 } 3148 } 3149 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3150 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3151 3152 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3153 /* cannot ensure iscol_o has same blocksize as iscol! */ 3154 3155 ierr = PetscFree(idx);CHKERRQ(ierr); 3156 *garray = cmap1; 3157 3158 ierr = VecDestroy(&x);CHKERRQ(ierr); 3159 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3160 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3161 PetscFunctionReturn(0); 3162 } 3163 3164 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3165 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3166 { 3167 PetscErrorCode ierr; 3168 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3169 Mat M = NULL; 3170 MPI_Comm comm; 3171 IS iscol_d,isrow_d,iscol_o; 3172 Mat Asub = NULL,Bsub = NULL; 3173 PetscInt n; 3174 3175 PetscFunctionBegin; 3176 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3177 3178 if (call == MAT_REUSE_MATRIX) { 3179 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3180 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3181 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3182 3183 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3184 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3185 3186 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3187 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3188 3189 /* Update diagonal and off-diagonal portions of submat */ 3190 asub = (Mat_MPIAIJ*)(*submat)->data; 3191 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3192 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3193 if (n) { 3194 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3195 } 3196 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3197 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3198 3199 } else { /* call == MAT_INITIAL_MATRIX) */ 3200 const PetscInt *garray; 3201 PetscInt BsubN; 3202 3203 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3204 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3205 3206 /* Create local submatrices Asub and Bsub */ 3207 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3208 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3209 3210 /* Create submatrix M */ 3211 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3212 3213 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3214 asub = (Mat_MPIAIJ*)M->data; 3215 3216 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3217 n = asub->B->cmap->N; 3218 if (BsubN > n) { 3219 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3220 const PetscInt *idx; 3221 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3222 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3223 3224 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3225 j = 0; 3226 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3227 for (i=0; i<n; i++) { 3228 if (j >= BsubN) break; 3229 while (subgarray[i] > garray[j]) j++; 3230 3231 if (subgarray[i] == garray[j]) { 3232 idx_new[i] = idx[j++]; 3233 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3234 } 3235 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3236 3237 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3238 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3239 3240 } else if (BsubN < n) { 3241 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3242 } 3243 3244 ierr = PetscFree(garray);CHKERRQ(ierr); 3245 *submat = M; 3246 3247 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3248 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3249 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3250 3251 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3252 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3253 3254 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3255 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3256 } 3257 PetscFunctionReturn(0); 3258 } 3259 3260 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3261 { 3262 PetscErrorCode ierr; 3263 IS iscol_local=NULL,isrow_d; 3264 PetscInt csize; 3265 PetscInt n,i,j,start,end; 3266 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3267 MPI_Comm comm; 3268 3269 PetscFunctionBegin; 3270 /* If isrow has same processor distribution as mat, 3271 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3272 if (call == MAT_REUSE_MATRIX) { 3273 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3274 if (isrow_d) { 3275 sameRowDist = PETSC_TRUE; 3276 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3277 } else { 3278 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3279 if (iscol_local) { 3280 sameRowDist = PETSC_TRUE; 3281 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3282 } 3283 } 3284 } else { 3285 /* Check if isrow has same processor distribution as mat */ 3286 sameDist[0] = PETSC_FALSE; 3287 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3288 if (!n) { 3289 sameDist[0] = PETSC_TRUE; 3290 } else { 3291 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3292 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3293 if (i >= start && j < end) { 3294 sameDist[0] = PETSC_TRUE; 3295 } 3296 } 3297 3298 /* Check if iscol has same processor distribution as mat */ 3299 sameDist[1] = PETSC_FALSE; 3300 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3301 if (!n) { 3302 sameDist[1] = PETSC_TRUE; 3303 } else { 3304 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3305 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3306 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3307 } 3308 3309 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3310 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3311 sameRowDist = tsameDist[0]; 3312 } 3313 3314 if (sameRowDist) { 3315 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3316 /* isrow and iscol have same processor distribution as mat */ 3317 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3318 PetscFunctionReturn(0); 3319 } else { /* sameRowDist */ 3320 /* isrow has same processor distribution as mat */ 3321 if (call == MAT_INITIAL_MATRIX) { 3322 PetscBool sorted; 3323 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3324 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3325 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3326 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3327 3328 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3329 if (sorted) { 3330 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3331 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3332 PetscFunctionReturn(0); 3333 } 3334 } else { /* call == MAT_REUSE_MATRIX */ 3335 IS iscol_sub; 3336 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3337 if (iscol_sub) { 3338 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3339 PetscFunctionReturn(0); 3340 } 3341 } 3342 } 3343 } 3344 3345 /* General case: iscol -> iscol_local which has global size of iscol */ 3346 if (call == MAT_REUSE_MATRIX) { 3347 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3348 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3349 } else { 3350 if (!iscol_local) { 3351 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3352 } 3353 } 3354 3355 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3356 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3357 3358 if (call == MAT_INITIAL_MATRIX) { 3359 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3360 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3361 } 3362 PetscFunctionReturn(0); 3363 } 3364 3365 /*@C 3366 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3367 and "off-diagonal" part of the matrix in CSR format. 3368 3369 Collective on MPI_Comm 3370 3371 Input Parameters: 3372 + comm - MPI communicator 3373 . A - "diagonal" portion of matrix 3374 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3375 - garray - global index of B columns 3376 3377 Output Parameter: 3378 . mat - the matrix, with input A as its local diagonal matrix 3379 Level: advanced 3380 3381 Notes: 3382 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3383 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3384 3385 .seealso: MatCreateMPIAIJWithSplitArrays() 3386 @*/ 3387 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3388 { 3389 PetscErrorCode ierr; 3390 Mat_MPIAIJ *maij; 3391 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3392 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3393 PetscScalar *oa=b->a; 3394 Mat Bnew; 3395 PetscInt m,n,N; 3396 3397 PetscFunctionBegin; 3398 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3399 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3400 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3401 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3402 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3403 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3404 3405 /* Get global columns of mat */ 3406 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3407 3408 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3409 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3410 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3411 maij = (Mat_MPIAIJ*)(*mat)->data; 3412 3413 (*mat)->preallocated = PETSC_TRUE; 3414 3415 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3416 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3417 3418 /* Set A as diagonal portion of *mat */ 3419 maij->A = A; 3420 3421 nz = oi[m]; 3422 for (i=0; i<nz; i++) { 3423 col = oj[i]; 3424 oj[i] = garray[col]; 3425 } 3426 3427 /* Set Bnew as off-diagonal portion of *mat */ 3428 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3429 bnew = (Mat_SeqAIJ*)Bnew->data; 3430 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3431 maij->B = Bnew; 3432 3433 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3434 3435 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3436 b->free_a = PETSC_FALSE; 3437 b->free_ij = PETSC_FALSE; 3438 ierr = MatDestroy(&B);CHKERRQ(ierr); 3439 3440 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3441 bnew->free_a = PETSC_TRUE; 3442 bnew->free_ij = PETSC_TRUE; 3443 3444 /* condense columns of maij->B */ 3445 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3446 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3447 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3448 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3449 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3450 PetscFunctionReturn(0); 3451 } 3452 3453 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3454 3455 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3456 { 3457 PetscErrorCode ierr; 3458 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3459 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3460 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3461 Mat M,Msub,B=a->B; 3462 MatScalar *aa; 3463 Mat_SeqAIJ *aij; 3464 PetscInt *garray = a->garray,*colsub,Ncols; 3465 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3466 IS iscol_sub,iscmap; 3467 const PetscInt *is_idx,*cmap; 3468 PetscBool allcolumns=PETSC_FALSE; 3469 MPI_Comm comm; 3470 3471 PetscFunctionBegin; 3472 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3473 3474 if (call == MAT_REUSE_MATRIX) { 3475 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3476 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3477 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3478 3479 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3480 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3481 3482 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3483 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3484 3485 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3486 3487 } else { /* call == MAT_INITIAL_MATRIX) */ 3488 PetscBool flg; 3489 3490 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3491 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3492 3493 /* (1) iscol -> nonscalable iscol_local */ 3494 /* Check for special case: each processor gets entire matrix columns */ 3495 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3496 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3497 if (allcolumns) { 3498 iscol_sub = iscol_local; 3499 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3500 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3501 3502 } else { 3503 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3504 PetscInt *idx,*cmap1,k; 3505 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3506 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3507 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3508 count = 0; 3509 k = 0; 3510 for (i=0; i<Ncols; i++) { 3511 j = is_idx[i]; 3512 if (j >= cstart && j < cend) { 3513 /* diagonal part of mat */ 3514 idx[count] = j; 3515 cmap1[count++] = i; /* column index in submat */ 3516 } else if (Bn) { 3517 /* off-diagonal part of mat */ 3518 if (j == garray[k]) { 3519 idx[count] = j; 3520 cmap1[count++] = i; /* column index in submat */ 3521 } else if (j > garray[k]) { 3522 while (j > garray[k] && k < Bn-1) k++; 3523 if (j == garray[k]) { 3524 idx[count] = j; 3525 cmap1[count++] = i; /* column index in submat */ 3526 } 3527 } 3528 } 3529 } 3530 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3531 3532 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3533 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3534 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3535 3536 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3537 } 3538 3539 /* (3) Create sequential Msub */ 3540 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3541 } 3542 3543 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3544 aij = (Mat_SeqAIJ*)(Msub)->data; 3545 ii = aij->i; 3546 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3547 3548 /* 3549 m - number of local rows 3550 Ncols - number of columns (same on all processors) 3551 rstart - first row in new global matrix generated 3552 */ 3553 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3554 3555 if (call == MAT_INITIAL_MATRIX) { 3556 /* (4) Create parallel newmat */ 3557 PetscMPIInt rank,size; 3558 PetscInt csize; 3559 3560 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3561 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3562 3563 /* 3564 Determine the number of non-zeros in the diagonal and off-diagonal 3565 portions of the matrix in order to do correct preallocation 3566 */ 3567 3568 /* first get start and end of "diagonal" columns */ 3569 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3570 if (csize == PETSC_DECIDE) { 3571 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3572 if (mglobal == Ncols) { /* square matrix */ 3573 nlocal = m; 3574 } else { 3575 nlocal = Ncols/size + ((Ncols % size) > rank); 3576 } 3577 } else { 3578 nlocal = csize; 3579 } 3580 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3581 rstart = rend - nlocal; 3582 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3583 3584 /* next, compute all the lengths */ 3585 jj = aij->j; 3586 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3587 olens = dlens + m; 3588 for (i=0; i<m; i++) { 3589 jend = ii[i+1] - ii[i]; 3590 olen = 0; 3591 dlen = 0; 3592 for (j=0; j<jend; j++) { 3593 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3594 else dlen++; 3595 jj++; 3596 } 3597 olens[i] = olen; 3598 dlens[i] = dlen; 3599 } 3600 3601 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3602 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3603 3604 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3605 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3606 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3607 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3608 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3609 ierr = PetscFree(dlens);CHKERRQ(ierr); 3610 3611 } else { /* call == MAT_REUSE_MATRIX */ 3612 M = *newmat; 3613 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3614 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3615 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3616 /* 3617 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3618 rather than the slower MatSetValues(). 3619 */ 3620 M->was_assembled = PETSC_TRUE; 3621 M->assembled = PETSC_FALSE; 3622 } 3623 3624 /* (5) Set values of Msub to *newmat */ 3625 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3626 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3627 3628 jj = aij->j; 3629 aa = aij->a; 3630 for (i=0; i<m; i++) { 3631 row = rstart + i; 3632 nz = ii[i+1] - ii[i]; 3633 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3634 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3635 jj += nz; aa += nz; 3636 } 3637 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3638 3639 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3640 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3641 3642 ierr = PetscFree(colsub);CHKERRQ(ierr); 3643 3644 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3645 if (call == MAT_INITIAL_MATRIX) { 3646 *newmat = M; 3647 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3648 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3649 3650 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3651 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3652 3653 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3654 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3655 3656 if (iscol_local) { 3657 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3658 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3659 } 3660 } 3661 PetscFunctionReturn(0); 3662 } 3663 3664 /* 3665 Not great since it makes two copies of the submatrix, first an SeqAIJ 3666 in local and then by concatenating the local matrices the end result. 3667 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3668 3669 Note: This requires a sequential iscol with all indices. 3670 */ 3671 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3672 { 3673 PetscErrorCode ierr; 3674 PetscMPIInt rank,size; 3675 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3676 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3677 Mat M,Mreuse; 3678 MatScalar *aa,*vwork; 3679 MPI_Comm comm; 3680 Mat_SeqAIJ *aij; 3681 PetscBool colflag,allcolumns=PETSC_FALSE; 3682 3683 PetscFunctionBegin; 3684 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3685 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3686 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3687 3688 /* Check for special case: each processor gets entire matrix columns */ 3689 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3690 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3691 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3692 3693 if (call == MAT_REUSE_MATRIX) { 3694 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3695 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3696 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3697 } else { 3698 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3699 } 3700 3701 /* 3702 m - number of local rows 3703 n - number of columns (same on all processors) 3704 rstart - first row in new global matrix generated 3705 */ 3706 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3707 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3708 if (call == MAT_INITIAL_MATRIX) { 3709 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3710 ii = aij->i; 3711 jj = aij->j; 3712 3713 /* 3714 Determine the number of non-zeros in the diagonal and off-diagonal 3715 portions of the matrix in order to do correct preallocation 3716 */ 3717 3718 /* first get start and end of "diagonal" columns */ 3719 if (csize == PETSC_DECIDE) { 3720 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3721 if (mglobal == n) { /* square matrix */ 3722 nlocal = m; 3723 } else { 3724 nlocal = n/size + ((n % size) > rank); 3725 } 3726 } else { 3727 nlocal = csize; 3728 } 3729 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3730 rstart = rend - nlocal; 3731 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3732 3733 /* next, compute all the lengths */ 3734 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3735 olens = dlens + m; 3736 for (i=0; i<m; i++) { 3737 jend = ii[i+1] - ii[i]; 3738 olen = 0; 3739 dlen = 0; 3740 for (j=0; j<jend; j++) { 3741 if (*jj < rstart || *jj >= rend) olen++; 3742 else dlen++; 3743 jj++; 3744 } 3745 olens[i] = olen; 3746 dlens[i] = dlen; 3747 } 3748 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3749 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3750 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3751 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3752 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3753 ierr = PetscFree(dlens);CHKERRQ(ierr); 3754 } else { 3755 PetscInt ml,nl; 3756 3757 M = *newmat; 3758 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3759 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3760 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3761 /* 3762 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3763 rather than the slower MatSetValues(). 3764 */ 3765 M->was_assembled = PETSC_TRUE; 3766 M->assembled = PETSC_FALSE; 3767 } 3768 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3769 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3770 ii = aij->i; 3771 jj = aij->j; 3772 aa = aij->a; 3773 for (i=0; i<m; i++) { 3774 row = rstart + i; 3775 nz = ii[i+1] - ii[i]; 3776 cwork = jj; jj += nz; 3777 vwork = aa; aa += nz; 3778 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3779 } 3780 3781 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3782 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3783 *newmat = M; 3784 3785 /* save submatrix used in processor for next request */ 3786 if (call == MAT_INITIAL_MATRIX) { 3787 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3788 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3789 } 3790 PetscFunctionReturn(0); 3791 } 3792 3793 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3794 { 3795 PetscInt m,cstart, cend,j,nnz,i,d; 3796 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3797 const PetscInt *JJ; 3798 PetscScalar *values; 3799 PetscErrorCode ierr; 3800 PetscBool nooffprocentries; 3801 3802 PetscFunctionBegin; 3803 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3804 3805 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3806 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3807 m = B->rmap->n; 3808 cstart = B->cmap->rstart; 3809 cend = B->cmap->rend; 3810 rstart = B->rmap->rstart; 3811 3812 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3813 3814 #if defined(PETSC_USE_DEBUG) 3815 for (i=0; i<m; i++) { 3816 nnz = Ii[i+1]- Ii[i]; 3817 JJ = J + Ii[i]; 3818 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3819 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3820 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3821 } 3822 #endif 3823 3824 for (i=0; i<m; i++) { 3825 nnz = Ii[i+1]- Ii[i]; 3826 JJ = J + Ii[i]; 3827 nnz_max = PetscMax(nnz_max,nnz); 3828 d = 0; 3829 for (j=0; j<nnz; j++) { 3830 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3831 } 3832 d_nnz[i] = d; 3833 o_nnz[i] = nnz - d; 3834 } 3835 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3836 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3837 3838 if (v) values = (PetscScalar*)v; 3839 else { 3840 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3841 } 3842 3843 for (i=0; i<m; i++) { 3844 ii = i + rstart; 3845 nnz = Ii[i+1]- Ii[i]; 3846 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3847 } 3848 nooffprocentries = B->nooffprocentries; 3849 B->nooffprocentries = PETSC_TRUE; 3850 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3851 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3852 B->nooffprocentries = nooffprocentries; 3853 3854 if (!v) { 3855 ierr = PetscFree(values);CHKERRQ(ierr); 3856 } 3857 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3858 PetscFunctionReturn(0); 3859 } 3860 3861 /*@ 3862 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3863 (the default parallel PETSc format). 3864 3865 Collective on MPI_Comm 3866 3867 Input Parameters: 3868 + B - the matrix 3869 . i - the indices into j for the start of each local row (starts with zero) 3870 . j - the column indices for each local row (starts with zero) 3871 - v - optional values in the matrix 3872 3873 Level: developer 3874 3875 Notes: 3876 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3877 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3878 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3879 3880 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3881 3882 The format which is used for the sparse matrix input, is equivalent to a 3883 row-major ordering.. i.e for the following matrix, the input data expected is 3884 as shown 3885 3886 $ 1 0 0 3887 $ 2 0 3 P0 3888 $ ------- 3889 $ 4 5 6 P1 3890 $ 3891 $ Process0 [P0]: rows_owned=[0,1] 3892 $ i = {0,1,3} [size = nrow+1 = 2+1] 3893 $ j = {0,0,2} [size = 3] 3894 $ v = {1,2,3} [size = 3] 3895 $ 3896 $ Process1 [P1]: rows_owned=[2] 3897 $ i = {0,3} [size = nrow+1 = 1+1] 3898 $ j = {0,1,2} [size = 3] 3899 $ v = {4,5,6} [size = 3] 3900 3901 .keywords: matrix, aij, compressed row, sparse, parallel 3902 3903 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3904 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3905 @*/ 3906 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3907 { 3908 PetscErrorCode ierr; 3909 3910 PetscFunctionBegin; 3911 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3912 PetscFunctionReturn(0); 3913 } 3914 3915 /*@C 3916 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3917 (the default parallel PETSc format). For good matrix assembly performance 3918 the user should preallocate the matrix storage by setting the parameters 3919 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3920 performance can be increased by more than a factor of 50. 3921 3922 Collective on MPI_Comm 3923 3924 Input Parameters: 3925 + B - the matrix 3926 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3927 (same value is used for all local rows) 3928 . d_nnz - array containing the number of nonzeros in the various rows of the 3929 DIAGONAL portion of the local submatrix (possibly different for each row) 3930 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3931 The size of this array is equal to the number of local rows, i.e 'm'. 3932 For matrices that will be factored, you must leave room for (and set) 3933 the diagonal entry even if it is zero. 3934 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3935 submatrix (same value is used for all local rows). 3936 - o_nnz - array containing the number of nonzeros in the various rows of the 3937 OFF-DIAGONAL portion of the local submatrix (possibly different for 3938 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3939 structure. The size of this array is equal to the number 3940 of local rows, i.e 'm'. 3941 3942 If the *_nnz parameter is given then the *_nz parameter is ignored 3943 3944 The AIJ format (also called the Yale sparse matrix format or 3945 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3946 storage. The stored row and column indices begin with zero. 3947 See Users-Manual: ch_mat for details. 3948 3949 The parallel matrix is partitioned such that the first m0 rows belong to 3950 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3951 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3952 3953 The DIAGONAL portion of the local submatrix of a processor can be defined 3954 as the submatrix which is obtained by extraction the part corresponding to 3955 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3956 first row that belongs to the processor, r2 is the last row belonging to 3957 the this processor, and c1-c2 is range of indices of the local part of a 3958 vector suitable for applying the matrix to. This is an mxn matrix. In the 3959 common case of a square matrix, the row and column ranges are the same and 3960 the DIAGONAL part is also square. The remaining portion of the local 3961 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3962 3963 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3964 3965 You can call MatGetInfo() to get information on how effective the preallocation was; 3966 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3967 You can also run with the option -info and look for messages with the string 3968 malloc in them to see if additional memory allocation was needed. 3969 3970 Example usage: 3971 3972 Consider the following 8x8 matrix with 34 non-zero values, that is 3973 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3974 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3975 as follows: 3976 3977 .vb 3978 1 2 0 | 0 3 0 | 0 4 3979 Proc0 0 5 6 | 7 0 0 | 8 0 3980 9 0 10 | 11 0 0 | 12 0 3981 ------------------------------------- 3982 13 0 14 | 15 16 17 | 0 0 3983 Proc1 0 18 0 | 19 20 21 | 0 0 3984 0 0 0 | 22 23 0 | 24 0 3985 ------------------------------------- 3986 Proc2 25 26 27 | 0 0 28 | 29 0 3987 30 0 0 | 31 32 33 | 0 34 3988 .ve 3989 3990 This can be represented as a collection of submatrices as: 3991 3992 .vb 3993 A B C 3994 D E F 3995 G H I 3996 .ve 3997 3998 Where the submatrices A,B,C are owned by proc0, D,E,F are 3999 owned by proc1, G,H,I are owned by proc2. 4000 4001 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4002 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4003 The 'M','N' parameters are 8,8, and have the same values on all procs. 4004 4005 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4006 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4007 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4008 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4009 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4010 matrix, ans [DF] as another SeqAIJ matrix. 4011 4012 When d_nz, o_nz parameters are specified, d_nz storage elements are 4013 allocated for every row of the local diagonal submatrix, and o_nz 4014 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4015 One way to choose d_nz and o_nz is to use the max nonzerors per local 4016 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4017 In this case, the values of d_nz,o_nz are: 4018 .vb 4019 proc0 : dnz = 2, o_nz = 2 4020 proc1 : dnz = 3, o_nz = 2 4021 proc2 : dnz = 1, o_nz = 4 4022 .ve 4023 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4024 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4025 for proc3. i.e we are using 12+15+10=37 storage locations to store 4026 34 values. 4027 4028 When d_nnz, o_nnz parameters are specified, the storage is specified 4029 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4030 In the above case the values for d_nnz,o_nnz are: 4031 .vb 4032 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4033 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4034 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4035 .ve 4036 Here the space allocated is sum of all the above values i.e 34, and 4037 hence pre-allocation is perfect. 4038 4039 Level: intermediate 4040 4041 .keywords: matrix, aij, compressed row, sparse, parallel 4042 4043 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4044 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4045 @*/ 4046 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4047 { 4048 PetscErrorCode ierr; 4049 4050 PetscFunctionBegin; 4051 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4052 PetscValidType(B,1); 4053 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4054 PetscFunctionReturn(0); 4055 } 4056 4057 /*@ 4058 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4059 CSR format the local rows. 4060 4061 Collective on MPI_Comm 4062 4063 Input Parameters: 4064 + comm - MPI communicator 4065 . m - number of local rows (Cannot be PETSC_DECIDE) 4066 . n - This value should be the same as the local size used in creating the 4067 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4068 calculated if N is given) For square matrices n is almost always m. 4069 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4070 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4071 . i - row indices 4072 . j - column indices 4073 - a - matrix values 4074 4075 Output Parameter: 4076 . mat - the matrix 4077 4078 Level: intermediate 4079 4080 Notes: 4081 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4082 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4083 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4084 4085 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4086 4087 The format which is used for the sparse matrix input, is equivalent to a 4088 row-major ordering.. i.e for the following matrix, the input data expected is 4089 as shown 4090 4091 $ 1 0 0 4092 $ 2 0 3 P0 4093 $ ------- 4094 $ 4 5 6 P1 4095 $ 4096 $ Process0 [P0]: rows_owned=[0,1] 4097 $ i = {0,1,3} [size = nrow+1 = 2+1] 4098 $ j = {0,0,2} [size = 3] 4099 $ v = {1,2,3} [size = 3] 4100 $ 4101 $ Process1 [P1]: rows_owned=[2] 4102 $ i = {0,3} [size = nrow+1 = 1+1] 4103 $ j = {0,1,2} [size = 3] 4104 $ v = {4,5,6} [size = 3] 4105 4106 .keywords: matrix, aij, compressed row, sparse, parallel 4107 4108 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4109 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4110 @*/ 4111 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4112 { 4113 PetscErrorCode ierr; 4114 4115 PetscFunctionBegin; 4116 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4117 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4118 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4119 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4120 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4121 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4122 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4123 PetscFunctionReturn(0); 4124 } 4125 4126 /*@C 4127 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4128 (the default parallel PETSc format). For good matrix assembly performance 4129 the user should preallocate the matrix storage by setting the parameters 4130 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4131 performance can be increased by more than a factor of 50. 4132 4133 Collective on MPI_Comm 4134 4135 Input Parameters: 4136 + comm - MPI communicator 4137 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4138 This value should be the same as the local size used in creating the 4139 y vector for the matrix-vector product y = Ax. 4140 . n - This value should be the same as the local size used in creating the 4141 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4142 calculated if N is given) For square matrices n is almost always m. 4143 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4144 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4145 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4146 (same value is used for all local rows) 4147 . d_nnz - array containing the number of nonzeros in the various rows of the 4148 DIAGONAL portion of the local submatrix (possibly different for each row) 4149 or NULL, if d_nz is used to specify the nonzero structure. 4150 The size of this array is equal to the number of local rows, i.e 'm'. 4151 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4152 submatrix (same value is used for all local rows). 4153 - o_nnz - array containing the number of nonzeros in the various rows of the 4154 OFF-DIAGONAL portion of the local submatrix (possibly different for 4155 each row) or NULL, if o_nz is used to specify the nonzero 4156 structure. The size of this array is equal to the number 4157 of local rows, i.e 'm'. 4158 4159 Output Parameter: 4160 . A - the matrix 4161 4162 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4163 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4164 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4165 4166 Notes: 4167 If the *_nnz parameter is given then the *_nz parameter is ignored 4168 4169 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4170 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4171 storage requirements for this matrix. 4172 4173 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4174 processor than it must be used on all processors that share the object for 4175 that argument. 4176 4177 The user MUST specify either the local or global matrix dimensions 4178 (possibly both). 4179 4180 The parallel matrix is partitioned across processors such that the 4181 first m0 rows belong to process 0, the next m1 rows belong to 4182 process 1, the next m2 rows belong to process 2 etc.. where 4183 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4184 values corresponding to [m x N] submatrix. 4185 4186 The columns are logically partitioned with the n0 columns belonging 4187 to 0th partition, the next n1 columns belonging to the next 4188 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4189 4190 The DIAGONAL portion of the local submatrix on any given processor 4191 is the submatrix corresponding to the rows and columns m,n 4192 corresponding to the given processor. i.e diagonal matrix on 4193 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4194 etc. The remaining portion of the local submatrix [m x (N-n)] 4195 constitute the OFF-DIAGONAL portion. The example below better 4196 illustrates this concept. 4197 4198 For a square global matrix we define each processor's diagonal portion 4199 to be its local rows and the corresponding columns (a square submatrix); 4200 each processor's off-diagonal portion encompasses the remainder of the 4201 local matrix (a rectangular submatrix). 4202 4203 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4204 4205 When calling this routine with a single process communicator, a matrix of 4206 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4207 type of communicator, use the construction mechanism 4208 .vb 4209 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4210 .ve 4211 4212 $ MatCreate(...,&A); 4213 $ MatSetType(A,MATMPIAIJ); 4214 $ MatSetSizes(A, m,n,M,N); 4215 $ MatMPIAIJSetPreallocation(A,...); 4216 4217 By default, this format uses inodes (identical nodes) when possible. 4218 We search for consecutive rows with the same nonzero structure, thereby 4219 reusing matrix information to achieve increased efficiency. 4220 4221 Options Database Keys: 4222 + -mat_no_inode - Do not use inodes 4223 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4224 4225 4226 4227 Example usage: 4228 4229 Consider the following 8x8 matrix with 34 non-zero values, that is 4230 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4231 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4232 as follows 4233 4234 .vb 4235 1 2 0 | 0 3 0 | 0 4 4236 Proc0 0 5 6 | 7 0 0 | 8 0 4237 9 0 10 | 11 0 0 | 12 0 4238 ------------------------------------- 4239 13 0 14 | 15 16 17 | 0 0 4240 Proc1 0 18 0 | 19 20 21 | 0 0 4241 0 0 0 | 22 23 0 | 24 0 4242 ------------------------------------- 4243 Proc2 25 26 27 | 0 0 28 | 29 0 4244 30 0 0 | 31 32 33 | 0 34 4245 .ve 4246 4247 This can be represented as a collection of submatrices as 4248 4249 .vb 4250 A B C 4251 D E F 4252 G H I 4253 .ve 4254 4255 Where the submatrices A,B,C are owned by proc0, D,E,F are 4256 owned by proc1, G,H,I are owned by proc2. 4257 4258 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4259 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4260 The 'M','N' parameters are 8,8, and have the same values on all procs. 4261 4262 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4263 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4264 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4265 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4266 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4267 matrix, ans [DF] as another SeqAIJ matrix. 4268 4269 When d_nz, o_nz parameters are specified, d_nz storage elements are 4270 allocated for every row of the local diagonal submatrix, and o_nz 4271 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4272 One way to choose d_nz and o_nz is to use the max nonzerors per local 4273 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4274 In this case, the values of d_nz,o_nz are 4275 .vb 4276 proc0 : dnz = 2, o_nz = 2 4277 proc1 : dnz = 3, o_nz = 2 4278 proc2 : dnz = 1, o_nz = 4 4279 .ve 4280 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4281 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4282 for proc3. i.e we are using 12+15+10=37 storage locations to store 4283 34 values. 4284 4285 When d_nnz, o_nnz parameters are specified, the storage is specified 4286 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4287 In the above case the values for d_nnz,o_nnz are 4288 .vb 4289 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4290 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4291 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4292 .ve 4293 Here the space allocated is sum of all the above values i.e 34, and 4294 hence pre-allocation is perfect. 4295 4296 Level: intermediate 4297 4298 .keywords: matrix, aij, compressed row, sparse, parallel 4299 4300 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4301 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4302 @*/ 4303 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4304 { 4305 PetscErrorCode ierr; 4306 PetscMPIInt size; 4307 4308 PetscFunctionBegin; 4309 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4310 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4311 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4312 if (size > 1) { 4313 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4314 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4315 } else { 4316 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4317 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4318 } 4319 PetscFunctionReturn(0); 4320 } 4321 4322 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4323 { 4324 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4325 PetscBool flg; 4326 PetscErrorCode ierr; 4327 4328 PetscFunctionBegin; 4329 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4330 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4331 if (Ad) *Ad = a->A; 4332 if (Ao) *Ao = a->B; 4333 if (colmap) *colmap = a->garray; 4334 PetscFunctionReturn(0); 4335 } 4336 4337 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4338 { 4339 PetscErrorCode ierr; 4340 PetscInt m,N,i,rstart,nnz,Ii; 4341 PetscInt *indx; 4342 PetscScalar *values; 4343 4344 PetscFunctionBegin; 4345 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4346 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4347 PetscInt *dnz,*onz,sum,bs,cbs; 4348 4349 if (n == PETSC_DECIDE) { 4350 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4351 } 4352 /* Check sum(n) = N */ 4353 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4354 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4355 4356 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4357 rstart -= m; 4358 4359 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4360 for (i=0; i<m; i++) { 4361 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4362 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4363 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4364 } 4365 4366 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4367 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4368 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4369 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4370 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4371 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4372 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4373 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4374 } 4375 4376 /* numeric phase */ 4377 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4378 for (i=0; i<m; i++) { 4379 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4380 Ii = i + rstart; 4381 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4382 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4383 } 4384 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4385 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4386 PetscFunctionReturn(0); 4387 } 4388 4389 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4390 { 4391 PetscErrorCode ierr; 4392 PetscMPIInt rank; 4393 PetscInt m,N,i,rstart,nnz; 4394 size_t len; 4395 const PetscInt *indx; 4396 PetscViewer out; 4397 char *name; 4398 Mat B; 4399 const PetscScalar *values; 4400 4401 PetscFunctionBegin; 4402 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4403 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4404 /* Should this be the type of the diagonal block of A? */ 4405 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4406 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4407 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4408 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4409 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4410 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4411 for (i=0; i<m; i++) { 4412 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4413 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4414 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4415 } 4416 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4417 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4418 4419 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4420 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4421 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4422 sprintf(name,"%s.%d",outfile,rank); 4423 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4424 ierr = PetscFree(name);CHKERRQ(ierr); 4425 ierr = MatView(B,out);CHKERRQ(ierr); 4426 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4427 ierr = MatDestroy(&B);CHKERRQ(ierr); 4428 PetscFunctionReturn(0); 4429 } 4430 4431 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4432 { 4433 PetscErrorCode ierr; 4434 Mat_Merge_SeqsToMPI *merge; 4435 PetscContainer container; 4436 4437 PetscFunctionBegin; 4438 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4439 if (container) { 4440 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4441 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4442 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4443 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4444 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4445 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4446 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4447 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4448 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4449 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4450 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4451 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4452 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4453 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4454 ierr = PetscFree(merge);CHKERRQ(ierr); 4455 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4456 } 4457 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4458 PetscFunctionReturn(0); 4459 } 4460 4461 #include <../src/mat/utils/freespace.h> 4462 #include <petscbt.h> 4463 4464 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4465 { 4466 PetscErrorCode ierr; 4467 MPI_Comm comm; 4468 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4469 PetscMPIInt size,rank,taga,*len_s; 4470 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4471 PetscInt proc,m; 4472 PetscInt **buf_ri,**buf_rj; 4473 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4474 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4475 MPI_Request *s_waits,*r_waits; 4476 MPI_Status *status; 4477 MatScalar *aa=a->a; 4478 MatScalar **abuf_r,*ba_i; 4479 Mat_Merge_SeqsToMPI *merge; 4480 PetscContainer container; 4481 4482 PetscFunctionBegin; 4483 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4484 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4485 4486 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4487 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4488 4489 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4490 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4491 4492 bi = merge->bi; 4493 bj = merge->bj; 4494 buf_ri = merge->buf_ri; 4495 buf_rj = merge->buf_rj; 4496 4497 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4498 owners = merge->rowmap->range; 4499 len_s = merge->len_s; 4500 4501 /* send and recv matrix values */ 4502 /*-----------------------------*/ 4503 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4504 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4505 4506 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4507 for (proc=0,k=0; proc<size; proc++) { 4508 if (!len_s[proc]) continue; 4509 i = owners[proc]; 4510 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4511 k++; 4512 } 4513 4514 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4515 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4516 ierr = PetscFree(status);CHKERRQ(ierr); 4517 4518 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4519 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4520 4521 /* insert mat values of mpimat */ 4522 /*----------------------------*/ 4523 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4524 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4525 4526 for (k=0; k<merge->nrecv; k++) { 4527 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4528 nrows = *(buf_ri_k[k]); 4529 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4530 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4531 } 4532 4533 /* set values of ba */ 4534 m = merge->rowmap->n; 4535 for (i=0; i<m; i++) { 4536 arow = owners[rank] + i; 4537 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4538 bnzi = bi[i+1] - bi[i]; 4539 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4540 4541 /* add local non-zero vals of this proc's seqmat into ba */ 4542 anzi = ai[arow+1] - ai[arow]; 4543 aj = a->j + ai[arow]; 4544 aa = a->a + ai[arow]; 4545 nextaj = 0; 4546 for (j=0; nextaj<anzi; j++) { 4547 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4548 ba_i[j] += aa[nextaj++]; 4549 } 4550 } 4551 4552 /* add received vals into ba */ 4553 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4554 /* i-th row */ 4555 if (i == *nextrow[k]) { 4556 anzi = *(nextai[k]+1) - *nextai[k]; 4557 aj = buf_rj[k] + *(nextai[k]); 4558 aa = abuf_r[k] + *(nextai[k]); 4559 nextaj = 0; 4560 for (j=0; nextaj<anzi; j++) { 4561 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4562 ba_i[j] += aa[nextaj++]; 4563 } 4564 } 4565 nextrow[k]++; nextai[k]++; 4566 } 4567 } 4568 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4569 } 4570 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4571 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4572 4573 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4574 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4575 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4576 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4577 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4578 PetscFunctionReturn(0); 4579 } 4580 4581 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4582 { 4583 PetscErrorCode ierr; 4584 Mat B_mpi; 4585 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4586 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4587 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4588 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4589 PetscInt len,proc,*dnz,*onz,bs,cbs; 4590 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4591 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4592 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4593 MPI_Status *status; 4594 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4595 PetscBT lnkbt; 4596 Mat_Merge_SeqsToMPI *merge; 4597 PetscContainer container; 4598 4599 PetscFunctionBegin; 4600 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4601 4602 /* make sure it is a PETSc comm */ 4603 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4604 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4605 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4606 4607 ierr = PetscNew(&merge);CHKERRQ(ierr); 4608 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4609 4610 /* determine row ownership */ 4611 /*---------------------------------------------------------*/ 4612 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4613 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4614 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4615 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4616 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4617 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4618 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4619 4620 m = merge->rowmap->n; 4621 owners = merge->rowmap->range; 4622 4623 /* determine the number of messages to send, their lengths */ 4624 /*---------------------------------------------------------*/ 4625 len_s = merge->len_s; 4626 4627 len = 0; /* length of buf_si[] */ 4628 merge->nsend = 0; 4629 for (proc=0; proc<size; proc++) { 4630 len_si[proc] = 0; 4631 if (proc == rank) { 4632 len_s[proc] = 0; 4633 } else { 4634 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4635 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4636 } 4637 if (len_s[proc]) { 4638 merge->nsend++; 4639 nrows = 0; 4640 for (i=owners[proc]; i<owners[proc+1]; i++) { 4641 if (ai[i+1] > ai[i]) nrows++; 4642 } 4643 len_si[proc] = 2*(nrows+1); 4644 len += len_si[proc]; 4645 } 4646 } 4647 4648 /* determine the number and length of messages to receive for ij-structure */ 4649 /*-------------------------------------------------------------------------*/ 4650 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4651 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4652 4653 /* post the Irecv of j-structure */ 4654 /*-------------------------------*/ 4655 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4656 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4657 4658 /* post the Isend of j-structure */ 4659 /*--------------------------------*/ 4660 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4661 4662 for (proc=0, k=0; proc<size; proc++) { 4663 if (!len_s[proc]) continue; 4664 i = owners[proc]; 4665 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4666 k++; 4667 } 4668 4669 /* receives and sends of j-structure are complete */ 4670 /*------------------------------------------------*/ 4671 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4672 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4673 4674 /* send and recv i-structure */ 4675 /*---------------------------*/ 4676 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4677 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4678 4679 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4680 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4681 for (proc=0,k=0; proc<size; proc++) { 4682 if (!len_s[proc]) continue; 4683 /* form outgoing message for i-structure: 4684 buf_si[0]: nrows to be sent 4685 [1:nrows]: row index (global) 4686 [nrows+1:2*nrows+1]: i-structure index 4687 */ 4688 /*-------------------------------------------*/ 4689 nrows = len_si[proc]/2 - 1; 4690 buf_si_i = buf_si + nrows+1; 4691 buf_si[0] = nrows; 4692 buf_si_i[0] = 0; 4693 nrows = 0; 4694 for (i=owners[proc]; i<owners[proc+1]; i++) { 4695 anzi = ai[i+1] - ai[i]; 4696 if (anzi) { 4697 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4698 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4699 nrows++; 4700 } 4701 } 4702 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4703 k++; 4704 buf_si += len_si[proc]; 4705 } 4706 4707 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4708 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4709 4710 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4711 for (i=0; i<merge->nrecv; i++) { 4712 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4713 } 4714 4715 ierr = PetscFree(len_si);CHKERRQ(ierr); 4716 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4717 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4718 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4719 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4720 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4721 ierr = PetscFree(status);CHKERRQ(ierr); 4722 4723 /* compute a local seq matrix in each processor */ 4724 /*----------------------------------------------*/ 4725 /* allocate bi array and free space for accumulating nonzero column info */ 4726 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4727 bi[0] = 0; 4728 4729 /* create and initialize a linked list */ 4730 nlnk = N+1; 4731 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4732 4733 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4734 len = ai[owners[rank+1]] - ai[owners[rank]]; 4735 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4736 4737 current_space = free_space; 4738 4739 /* determine symbolic info for each local row */ 4740 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4741 4742 for (k=0; k<merge->nrecv; k++) { 4743 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4744 nrows = *buf_ri_k[k]; 4745 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4746 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4747 } 4748 4749 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4750 len = 0; 4751 for (i=0; i<m; i++) { 4752 bnzi = 0; 4753 /* add local non-zero cols of this proc's seqmat into lnk */ 4754 arow = owners[rank] + i; 4755 anzi = ai[arow+1] - ai[arow]; 4756 aj = a->j + ai[arow]; 4757 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4758 bnzi += nlnk; 4759 /* add received col data into lnk */ 4760 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4761 if (i == *nextrow[k]) { /* i-th row */ 4762 anzi = *(nextai[k]+1) - *nextai[k]; 4763 aj = buf_rj[k] + *nextai[k]; 4764 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4765 bnzi += nlnk; 4766 nextrow[k]++; nextai[k]++; 4767 } 4768 } 4769 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4770 4771 /* if free space is not available, make more free space */ 4772 if (current_space->local_remaining<bnzi) { 4773 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4774 nspacedouble++; 4775 } 4776 /* copy data into free space, then initialize lnk */ 4777 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4778 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4779 4780 current_space->array += bnzi; 4781 current_space->local_used += bnzi; 4782 current_space->local_remaining -= bnzi; 4783 4784 bi[i+1] = bi[i] + bnzi; 4785 } 4786 4787 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4788 4789 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4790 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4791 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4792 4793 /* create symbolic parallel matrix B_mpi */ 4794 /*---------------------------------------*/ 4795 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4796 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4797 if (n==PETSC_DECIDE) { 4798 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4799 } else { 4800 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4801 } 4802 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4803 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4804 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4805 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4806 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4807 4808 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4809 B_mpi->assembled = PETSC_FALSE; 4810 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4811 merge->bi = bi; 4812 merge->bj = bj; 4813 merge->buf_ri = buf_ri; 4814 merge->buf_rj = buf_rj; 4815 merge->coi = NULL; 4816 merge->coj = NULL; 4817 merge->owners_co = NULL; 4818 4819 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4820 4821 /* attach the supporting struct to B_mpi for reuse */ 4822 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4823 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4824 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4825 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4826 *mpimat = B_mpi; 4827 4828 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4829 PetscFunctionReturn(0); 4830 } 4831 4832 /*@C 4833 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4834 matrices from each processor 4835 4836 Collective on MPI_Comm 4837 4838 Input Parameters: 4839 + comm - the communicators the parallel matrix will live on 4840 . seqmat - the input sequential matrices 4841 . m - number of local rows (or PETSC_DECIDE) 4842 . n - number of local columns (or PETSC_DECIDE) 4843 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4844 4845 Output Parameter: 4846 . mpimat - the parallel matrix generated 4847 4848 Level: advanced 4849 4850 Notes: 4851 The dimensions of the sequential matrix in each processor MUST be the same. 4852 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4853 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4854 @*/ 4855 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4856 { 4857 PetscErrorCode ierr; 4858 PetscMPIInt size; 4859 4860 PetscFunctionBegin; 4861 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4862 if (size == 1) { 4863 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4864 if (scall == MAT_INITIAL_MATRIX) { 4865 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4866 } else { 4867 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4868 } 4869 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4870 PetscFunctionReturn(0); 4871 } 4872 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4873 if (scall == MAT_INITIAL_MATRIX) { 4874 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4875 } 4876 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4877 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4878 PetscFunctionReturn(0); 4879 } 4880 4881 /*@ 4882 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4883 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4884 with MatGetSize() 4885 4886 Not Collective 4887 4888 Input Parameters: 4889 + A - the matrix 4890 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4891 4892 Output Parameter: 4893 . A_loc - the local sequential matrix generated 4894 4895 Level: developer 4896 4897 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4898 4899 @*/ 4900 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4901 { 4902 PetscErrorCode ierr; 4903 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4904 Mat_SeqAIJ *mat,*a,*b; 4905 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4906 MatScalar *aa,*ba,*cam; 4907 PetscScalar *ca; 4908 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4909 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4910 PetscBool match; 4911 MPI_Comm comm; 4912 PetscMPIInt size; 4913 4914 PetscFunctionBegin; 4915 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4916 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4917 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4918 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4919 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4920 4921 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4922 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4923 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4924 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4925 aa = a->a; ba = b->a; 4926 if (scall == MAT_INITIAL_MATRIX) { 4927 if (size == 1) { 4928 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4929 PetscFunctionReturn(0); 4930 } 4931 4932 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4933 ci[0] = 0; 4934 for (i=0; i<am; i++) { 4935 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4936 } 4937 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4938 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4939 k = 0; 4940 for (i=0; i<am; i++) { 4941 ncols_o = bi[i+1] - bi[i]; 4942 ncols_d = ai[i+1] - ai[i]; 4943 /* off-diagonal portion of A */ 4944 for (jo=0; jo<ncols_o; jo++) { 4945 col = cmap[*bj]; 4946 if (col >= cstart) break; 4947 cj[k] = col; bj++; 4948 ca[k++] = *ba++; 4949 } 4950 /* diagonal portion of A */ 4951 for (j=0; j<ncols_d; j++) { 4952 cj[k] = cstart + *aj++; 4953 ca[k++] = *aa++; 4954 } 4955 /* off-diagonal portion of A */ 4956 for (j=jo; j<ncols_o; j++) { 4957 cj[k] = cmap[*bj++]; 4958 ca[k++] = *ba++; 4959 } 4960 } 4961 /* put together the new matrix */ 4962 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4963 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4964 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4965 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4966 mat->free_a = PETSC_TRUE; 4967 mat->free_ij = PETSC_TRUE; 4968 mat->nonew = 0; 4969 } else if (scall == MAT_REUSE_MATRIX) { 4970 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4971 ci = mat->i; cj = mat->j; cam = mat->a; 4972 for (i=0; i<am; i++) { 4973 /* off-diagonal portion of A */ 4974 ncols_o = bi[i+1] - bi[i]; 4975 for (jo=0; jo<ncols_o; jo++) { 4976 col = cmap[*bj]; 4977 if (col >= cstart) break; 4978 *cam++ = *ba++; bj++; 4979 } 4980 /* diagonal portion of A */ 4981 ncols_d = ai[i+1] - ai[i]; 4982 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4983 /* off-diagonal portion of A */ 4984 for (j=jo; j<ncols_o; j++) { 4985 *cam++ = *ba++; bj++; 4986 } 4987 } 4988 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4989 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4990 PetscFunctionReturn(0); 4991 } 4992 4993 /*@C 4994 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4995 4996 Not Collective 4997 4998 Input Parameters: 4999 + A - the matrix 5000 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5001 - row, col - index sets of rows and columns to extract (or NULL) 5002 5003 Output Parameter: 5004 . A_loc - the local sequential matrix generated 5005 5006 Level: developer 5007 5008 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5009 5010 @*/ 5011 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5012 { 5013 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5014 PetscErrorCode ierr; 5015 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5016 IS isrowa,iscola; 5017 Mat *aloc; 5018 PetscBool match; 5019 5020 PetscFunctionBegin; 5021 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5022 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5023 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5024 if (!row) { 5025 start = A->rmap->rstart; end = A->rmap->rend; 5026 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5027 } else { 5028 isrowa = *row; 5029 } 5030 if (!col) { 5031 start = A->cmap->rstart; 5032 cmap = a->garray; 5033 nzA = a->A->cmap->n; 5034 nzB = a->B->cmap->n; 5035 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5036 ncols = 0; 5037 for (i=0; i<nzB; i++) { 5038 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5039 else break; 5040 } 5041 imark = i; 5042 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5043 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5044 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5045 } else { 5046 iscola = *col; 5047 } 5048 if (scall != MAT_INITIAL_MATRIX) { 5049 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5050 aloc[0] = *A_loc; 5051 } 5052 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5053 *A_loc = aloc[0]; 5054 ierr = PetscFree(aloc);CHKERRQ(ierr); 5055 if (!row) { 5056 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5057 } 5058 if (!col) { 5059 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5060 } 5061 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5062 PetscFunctionReturn(0); 5063 } 5064 5065 /*@C 5066 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5067 5068 Collective on Mat 5069 5070 Input Parameters: 5071 + A,B - the matrices in mpiaij format 5072 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5073 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5074 5075 Output Parameter: 5076 + rowb, colb - index sets of rows and columns of B to extract 5077 - B_seq - the sequential matrix generated 5078 5079 Level: developer 5080 5081 @*/ 5082 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5083 { 5084 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5085 PetscErrorCode ierr; 5086 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5087 IS isrowb,iscolb; 5088 Mat *bseq=NULL; 5089 5090 PetscFunctionBegin; 5091 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5092 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5093 } 5094 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5095 5096 if (scall == MAT_INITIAL_MATRIX) { 5097 start = A->cmap->rstart; 5098 cmap = a->garray; 5099 nzA = a->A->cmap->n; 5100 nzB = a->B->cmap->n; 5101 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5102 ncols = 0; 5103 for (i=0; i<nzB; i++) { /* row < local row index */ 5104 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5105 else break; 5106 } 5107 imark = i; 5108 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5109 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5110 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5111 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5112 } else { 5113 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5114 isrowb = *rowb; iscolb = *colb; 5115 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5116 bseq[0] = *B_seq; 5117 } 5118 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5119 *B_seq = bseq[0]; 5120 ierr = PetscFree(bseq);CHKERRQ(ierr); 5121 if (!rowb) { 5122 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5123 } else { 5124 *rowb = isrowb; 5125 } 5126 if (!colb) { 5127 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5128 } else { 5129 *colb = iscolb; 5130 } 5131 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5132 PetscFunctionReturn(0); 5133 } 5134 5135 /* 5136 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5137 of the OFF-DIAGONAL portion of local A 5138 5139 Collective on Mat 5140 5141 Input Parameters: 5142 + A,B - the matrices in mpiaij format 5143 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5144 5145 Output Parameter: 5146 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5147 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5148 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5149 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5150 5151 Level: developer 5152 5153 */ 5154 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5155 { 5156 VecScatter_MPI_General *gen_to,*gen_from; 5157 PetscErrorCode ierr; 5158 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5159 Mat_SeqAIJ *b_oth; 5160 VecScatter ctx; 5161 MPI_Comm comm; 5162 PetscMPIInt *rprocs,*sprocs,tag,rank; 5163 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5164 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5165 PetscScalar *b_otha,*bufa,*bufA,*vals; 5166 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5167 MPI_Request *rwaits = NULL,*swaits = NULL; 5168 MPI_Status *sstatus,rstatus; 5169 PetscMPIInt jj,size; 5170 VecScatterType type; 5171 PetscBool mpi1; 5172 5173 PetscFunctionBegin; 5174 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5175 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5176 5177 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5178 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5179 } 5180 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5181 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5182 5183 if (size == 1) { 5184 startsj_s = NULL; 5185 bufa_ptr = NULL; 5186 *B_oth = NULL; 5187 PetscFunctionReturn(0); 5188 } 5189 5190 ctx = a->Mvctx; 5191 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5192 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5193 if (!mpi1) { 5194 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5195 thus create a->Mvctx_mpi1 */ 5196 if (!a->Mvctx_mpi1) { 5197 a->Mvctx_mpi1_flg = PETSC_TRUE; 5198 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5199 } 5200 ctx = a->Mvctx_mpi1; 5201 } 5202 tag = ((PetscObject)ctx)->tag; 5203 5204 gen_to = (VecScatter_MPI_General*)ctx->todata; 5205 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5206 nrecvs = gen_from->n; 5207 nsends = gen_to->n; 5208 5209 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5210 srow = gen_to->indices; /* local row index to be sent */ 5211 sstarts = gen_to->starts; 5212 sprocs = gen_to->procs; 5213 sstatus = gen_to->sstatus; 5214 sbs = gen_to->bs; 5215 rstarts = gen_from->starts; 5216 rprocs = gen_from->procs; 5217 rbs = gen_from->bs; 5218 5219 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5220 if (scall == MAT_INITIAL_MATRIX) { 5221 /* i-array */ 5222 /*---------*/ 5223 /* post receives */ 5224 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5225 for (i=0; i<nrecvs; i++) { 5226 rowlen = rvalues + rstarts[i]*rbs; 5227 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5228 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5229 } 5230 5231 /* pack the outgoing message */ 5232 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5233 5234 sstartsj[0] = 0; 5235 rstartsj[0] = 0; 5236 len = 0; /* total length of j or a array to be sent */ 5237 k = 0; 5238 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5239 for (i=0; i<nsends; i++) { 5240 rowlen = svalues + sstarts[i]*sbs; 5241 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5242 for (j=0; j<nrows; j++) { 5243 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5244 for (l=0; l<sbs; l++) { 5245 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5246 5247 rowlen[j*sbs+l] = ncols; 5248 5249 len += ncols; 5250 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5251 } 5252 k++; 5253 } 5254 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5255 5256 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5257 } 5258 /* recvs and sends of i-array are completed */ 5259 i = nrecvs; 5260 while (i--) { 5261 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5262 } 5263 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5264 ierr = PetscFree(svalues);CHKERRQ(ierr); 5265 5266 /* allocate buffers for sending j and a arrays */ 5267 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5268 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5269 5270 /* create i-array of B_oth */ 5271 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5272 5273 b_othi[0] = 0; 5274 len = 0; /* total length of j or a array to be received */ 5275 k = 0; 5276 for (i=0; i<nrecvs; i++) { 5277 rowlen = rvalues + rstarts[i]*rbs; 5278 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5279 for (j=0; j<nrows; j++) { 5280 b_othi[k+1] = b_othi[k] + rowlen[j]; 5281 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5282 k++; 5283 } 5284 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5285 } 5286 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5287 5288 /* allocate space for j and a arrrays of B_oth */ 5289 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5290 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5291 5292 /* j-array */ 5293 /*---------*/ 5294 /* post receives of j-array */ 5295 for (i=0; i<nrecvs; i++) { 5296 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5297 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5298 } 5299 5300 /* pack the outgoing message j-array */ 5301 k = 0; 5302 for (i=0; i<nsends; i++) { 5303 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5304 bufJ = bufj+sstartsj[i]; 5305 for (j=0; j<nrows; j++) { 5306 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5307 for (ll=0; ll<sbs; ll++) { 5308 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5309 for (l=0; l<ncols; l++) { 5310 *bufJ++ = cols[l]; 5311 } 5312 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5313 } 5314 } 5315 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5316 } 5317 5318 /* recvs and sends of j-array are completed */ 5319 i = nrecvs; 5320 while (i--) { 5321 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5322 } 5323 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5324 } else if (scall == MAT_REUSE_MATRIX) { 5325 sstartsj = *startsj_s; 5326 rstartsj = *startsj_r; 5327 bufa = *bufa_ptr; 5328 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5329 b_otha = b_oth->a; 5330 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5331 5332 /* a-array */ 5333 /*---------*/ 5334 /* post receives of a-array */ 5335 for (i=0; i<nrecvs; i++) { 5336 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5337 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5338 } 5339 5340 /* pack the outgoing message a-array */ 5341 k = 0; 5342 for (i=0; i<nsends; i++) { 5343 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5344 bufA = bufa+sstartsj[i]; 5345 for (j=0; j<nrows; j++) { 5346 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5347 for (ll=0; ll<sbs; ll++) { 5348 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5349 for (l=0; l<ncols; l++) { 5350 *bufA++ = vals[l]; 5351 } 5352 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5353 } 5354 } 5355 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5356 } 5357 /* recvs and sends of a-array are completed */ 5358 i = nrecvs; 5359 while (i--) { 5360 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5361 } 5362 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5363 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5364 5365 if (scall == MAT_INITIAL_MATRIX) { 5366 /* put together the new matrix */ 5367 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5368 5369 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5370 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5371 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5372 b_oth->free_a = PETSC_TRUE; 5373 b_oth->free_ij = PETSC_TRUE; 5374 b_oth->nonew = 0; 5375 5376 ierr = PetscFree(bufj);CHKERRQ(ierr); 5377 if (!startsj_s || !bufa_ptr) { 5378 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5379 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5380 } else { 5381 *startsj_s = sstartsj; 5382 *startsj_r = rstartsj; 5383 *bufa_ptr = bufa; 5384 } 5385 } 5386 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5387 PetscFunctionReturn(0); 5388 } 5389 5390 /*@C 5391 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5392 5393 Not Collective 5394 5395 Input Parameters: 5396 . A - The matrix in mpiaij format 5397 5398 Output Parameter: 5399 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5400 . colmap - A map from global column index to local index into lvec 5401 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5402 5403 Level: developer 5404 5405 @*/ 5406 #if defined(PETSC_USE_CTABLE) 5407 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5408 #else 5409 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5410 #endif 5411 { 5412 Mat_MPIAIJ *a; 5413 5414 PetscFunctionBegin; 5415 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5416 PetscValidPointer(lvec, 2); 5417 PetscValidPointer(colmap, 3); 5418 PetscValidPointer(multScatter, 4); 5419 a = (Mat_MPIAIJ*) A->data; 5420 if (lvec) *lvec = a->lvec; 5421 if (colmap) *colmap = a->colmap; 5422 if (multScatter) *multScatter = a->Mvctx; 5423 PetscFunctionReturn(0); 5424 } 5425 5426 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5427 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5428 #if defined(PETSC_HAVE_MKL_SPARSE) 5429 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5430 #endif 5431 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5432 #if defined(PETSC_HAVE_ELEMENTAL) 5433 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5434 #endif 5435 #if defined(PETSC_HAVE_HYPRE) 5436 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5437 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5438 #endif 5439 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5440 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5441 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5442 5443 /* 5444 Computes (B'*A')' since computing B*A directly is untenable 5445 5446 n p p 5447 ( ) ( ) ( ) 5448 m ( A ) * n ( B ) = m ( C ) 5449 ( ) ( ) ( ) 5450 5451 */ 5452 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5453 { 5454 PetscErrorCode ierr; 5455 Mat At,Bt,Ct; 5456 5457 PetscFunctionBegin; 5458 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5459 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5460 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5461 ierr = MatDestroy(&At);CHKERRQ(ierr); 5462 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5463 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5464 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5465 PetscFunctionReturn(0); 5466 } 5467 5468 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5469 { 5470 PetscErrorCode ierr; 5471 PetscInt m=A->rmap->n,n=B->cmap->n; 5472 Mat Cmat; 5473 5474 PetscFunctionBegin; 5475 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5476 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5477 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5478 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5479 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5480 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5481 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5482 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5483 5484 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5485 5486 *C = Cmat; 5487 PetscFunctionReturn(0); 5488 } 5489 5490 /* ----------------------------------------------------------------*/ 5491 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5492 { 5493 PetscErrorCode ierr; 5494 5495 PetscFunctionBegin; 5496 if (scall == MAT_INITIAL_MATRIX) { 5497 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5498 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5499 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5500 } 5501 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5502 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5503 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5504 PetscFunctionReturn(0); 5505 } 5506 5507 /*MC 5508 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5509 5510 Options Database Keys: 5511 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5512 5513 Level: beginner 5514 5515 .seealso: MatCreateAIJ() 5516 M*/ 5517 5518 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5519 { 5520 Mat_MPIAIJ *b; 5521 PetscErrorCode ierr; 5522 PetscMPIInt size; 5523 5524 PetscFunctionBegin; 5525 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5526 5527 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5528 B->data = (void*)b; 5529 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5530 B->assembled = PETSC_FALSE; 5531 B->insertmode = NOT_SET_VALUES; 5532 b->size = size; 5533 5534 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5535 5536 /* build cache for off array entries formed */ 5537 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5538 5539 b->donotstash = PETSC_FALSE; 5540 b->colmap = 0; 5541 b->garray = 0; 5542 b->roworiented = PETSC_TRUE; 5543 5544 /* stuff used for matrix vector multiply */ 5545 b->lvec = NULL; 5546 b->Mvctx = NULL; 5547 5548 /* stuff for MatGetRow() */ 5549 b->rowindices = 0; 5550 b->rowvalues = 0; 5551 b->getrowactive = PETSC_FALSE; 5552 5553 /* flexible pointer used in CUSP/CUSPARSE classes */ 5554 b->spptr = NULL; 5555 5556 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5557 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5558 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5559 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5560 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5561 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5562 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5563 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5564 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5565 #if defined(PETSC_HAVE_MKL_SPARSE) 5566 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5567 #endif 5568 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5569 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5570 #if defined(PETSC_HAVE_ELEMENTAL) 5571 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5572 #endif 5573 #if defined(PETSC_HAVE_HYPRE) 5574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5575 #endif 5576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5577 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5578 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5579 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5580 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5581 #if defined(PETSC_HAVE_HYPRE) 5582 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5583 #endif 5584 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5585 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5586 PetscFunctionReturn(0); 5587 } 5588 5589 /*@C 5590 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5591 and "off-diagonal" part of the matrix in CSR format. 5592 5593 Collective on MPI_Comm 5594 5595 Input Parameters: 5596 + comm - MPI communicator 5597 . m - number of local rows (Cannot be PETSC_DECIDE) 5598 . n - This value should be the same as the local size used in creating the 5599 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5600 calculated if N is given) For square matrices n is almost always m. 5601 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5602 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5603 . i - row indices for "diagonal" portion of matrix 5604 . j - column indices 5605 . a - matrix values 5606 . oi - row indices for "off-diagonal" portion of matrix 5607 . oj - column indices 5608 - oa - matrix values 5609 5610 Output Parameter: 5611 . mat - the matrix 5612 5613 Level: advanced 5614 5615 Notes: 5616 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5617 must free the arrays once the matrix has been destroyed and not before. 5618 5619 The i and j indices are 0 based 5620 5621 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5622 5623 This sets local rows and cannot be used to set off-processor values. 5624 5625 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5626 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5627 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5628 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5629 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5630 communication if it is known that only local entries will be set. 5631 5632 .keywords: matrix, aij, compressed row, sparse, parallel 5633 5634 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5635 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5636 @*/ 5637 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5638 { 5639 PetscErrorCode ierr; 5640 Mat_MPIAIJ *maij; 5641 5642 PetscFunctionBegin; 5643 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5644 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5645 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5646 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5647 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5648 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5649 maij = (Mat_MPIAIJ*) (*mat)->data; 5650 5651 (*mat)->preallocated = PETSC_TRUE; 5652 5653 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5654 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5655 5656 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5657 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5658 5659 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5660 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5661 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5662 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5663 5664 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5665 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5666 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5667 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5668 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5669 PetscFunctionReturn(0); 5670 } 5671 5672 /* 5673 Special version for direct calls from Fortran 5674 */ 5675 #include <petsc/private/fortranimpl.h> 5676 5677 /* Change these macros so can be used in void function */ 5678 #undef CHKERRQ 5679 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5680 #undef SETERRQ2 5681 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5682 #undef SETERRQ3 5683 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5684 #undef SETERRQ 5685 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5686 5687 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5688 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5689 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5690 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5691 #else 5692 #endif 5693 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5694 { 5695 Mat mat = *mmat; 5696 PetscInt m = *mm, n = *mn; 5697 InsertMode addv = *maddv; 5698 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5699 PetscScalar value; 5700 PetscErrorCode ierr; 5701 5702 MatCheckPreallocated(mat,1); 5703 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5704 5705 #if defined(PETSC_USE_DEBUG) 5706 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5707 #endif 5708 { 5709 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5710 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5711 PetscBool roworiented = aij->roworiented; 5712 5713 /* Some Variables required in the macro */ 5714 Mat A = aij->A; 5715 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5716 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5717 MatScalar *aa = a->a; 5718 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5719 Mat B = aij->B; 5720 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5721 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5722 MatScalar *ba = b->a; 5723 5724 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5725 PetscInt nonew = a->nonew; 5726 MatScalar *ap1,*ap2; 5727 5728 PetscFunctionBegin; 5729 for (i=0; i<m; i++) { 5730 if (im[i] < 0) continue; 5731 #if defined(PETSC_USE_DEBUG) 5732 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5733 #endif 5734 if (im[i] >= rstart && im[i] < rend) { 5735 row = im[i] - rstart; 5736 lastcol1 = -1; 5737 rp1 = aj + ai[row]; 5738 ap1 = aa + ai[row]; 5739 rmax1 = aimax[row]; 5740 nrow1 = ailen[row]; 5741 low1 = 0; 5742 high1 = nrow1; 5743 lastcol2 = -1; 5744 rp2 = bj + bi[row]; 5745 ap2 = ba + bi[row]; 5746 rmax2 = bimax[row]; 5747 nrow2 = bilen[row]; 5748 low2 = 0; 5749 high2 = nrow2; 5750 5751 for (j=0; j<n; j++) { 5752 if (roworiented) value = v[i*n+j]; 5753 else value = v[i+j*m]; 5754 if (in[j] >= cstart && in[j] < cend) { 5755 col = in[j] - cstart; 5756 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5757 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5758 } else if (in[j] < 0) continue; 5759 #if defined(PETSC_USE_DEBUG) 5760 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5761 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5762 #endif 5763 else { 5764 if (mat->was_assembled) { 5765 if (!aij->colmap) { 5766 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5767 } 5768 #if defined(PETSC_USE_CTABLE) 5769 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5770 col--; 5771 #else 5772 col = aij->colmap[in[j]] - 1; 5773 #endif 5774 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5775 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5776 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5777 col = in[j]; 5778 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5779 B = aij->B; 5780 b = (Mat_SeqAIJ*)B->data; 5781 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5782 rp2 = bj + bi[row]; 5783 ap2 = ba + bi[row]; 5784 rmax2 = bimax[row]; 5785 nrow2 = bilen[row]; 5786 low2 = 0; 5787 high2 = nrow2; 5788 bm = aij->B->rmap->n; 5789 ba = b->a; 5790 } 5791 } else col = in[j]; 5792 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5793 } 5794 } 5795 } else if (!aij->donotstash) { 5796 if (roworiented) { 5797 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5798 } else { 5799 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5800 } 5801 } 5802 } 5803 } 5804 PetscFunctionReturnVoid(); 5805 } 5806 5807