1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 46 { 47 PetscErrorCode ierr; 48 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 49 50 PetscFunctionBegin; 51 if (mat->A) { 52 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 53 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 54 } 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 59 { 60 PetscErrorCode ierr; 61 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 62 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 63 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 64 const PetscInt *ia,*ib; 65 const MatScalar *aa,*bb; 66 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 67 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 68 69 PetscFunctionBegin; 70 *keptrows = 0; 71 ia = a->i; 72 ib = b->i; 73 for (i=0; i<m; i++) { 74 na = ia[i+1] - ia[i]; 75 nb = ib[i+1] - ib[i]; 76 if (!na && !nb) { 77 cnt++; 78 goto ok1; 79 } 80 aa = a->a + ia[i]; 81 for (j=0; j<na; j++) { 82 if (aa[j] != 0.0) goto ok1; 83 } 84 bb = b->a + ib[i]; 85 for (j=0; j <nb; j++) { 86 if (bb[j] != 0.0) goto ok1; 87 } 88 cnt++; 89 ok1:; 90 } 91 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 92 if (!n0rows) PetscFunctionReturn(0); 93 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 94 cnt = 0; 95 for (i=0; i<m; i++) { 96 na = ia[i+1] - ia[i]; 97 nb = ib[i+1] - ib[i]; 98 if (!na && !nb) continue; 99 aa = a->a + ia[i]; 100 for (j=0; j<na;j++) { 101 if (aa[j] != 0.0) { 102 rows[cnt++] = rstart + i; 103 goto ok2; 104 } 105 } 106 bb = b->a + ib[i]; 107 for (j=0; j<nb; j++) { 108 if (bb[j] != 0.0) { 109 rows[cnt++] = rstart + i; 110 goto ok2; 111 } 112 } 113 ok2:; 114 } 115 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 120 { 121 PetscErrorCode ierr; 122 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 123 124 PetscFunctionBegin; 125 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 126 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 127 } else { 128 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 129 } 130 PetscFunctionReturn(0); 131 } 132 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 462 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 463 { \ 464 if (col <= lastcol2) low2 = 0; \ 465 else high2 = nrow2; \ 466 lastcol2 = col; \ 467 while (high2-low2 > 5) { \ 468 t = (low2+high2)/2; \ 469 if (rp2[t] > col) high2 = t; \ 470 else low2 = t; \ 471 } \ 472 for (_i=low2; _i<high2; _i++) { \ 473 if (rp2[_i] > col) break; \ 474 if (rp2[_i] == col) { \ 475 if (addv == ADD_VALUES) ap2[_i] += value; \ 476 else ap2[_i] = value; \ 477 goto b_noinsert; \ 478 } \ 479 } \ 480 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 483 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 484 N = nrow2++ - 1; b->nz++; high2++; \ 485 /* shift up all the later entries in this row */ \ 486 for (ii=N; ii>=_i; ii--) { \ 487 rp2[ii+1] = rp2[ii]; \ 488 ap2[ii+1] = ap2[ii]; \ 489 } \ 490 rp2[_i] = col; \ 491 ap2[_i] = value; \ 492 B->nonzerostate++; \ 493 b_noinsert: ; \ 494 bilen[row] = nrow2; \ 495 } 496 497 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 498 { 499 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 500 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 501 PetscErrorCode ierr; 502 PetscInt l,*garray = mat->garray,diag; 503 504 PetscFunctionBegin; 505 /* code only works for square matrices A */ 506 507 /* find size of row to the left of the diagonal part */ 508 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 509 row = row - diag; 510 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 511 if (garray[b->j[b->i[row]+l]] > diag) break; 512 } 513 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 514 515 /* diagonal part */ 516 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* right of diagonal part */ 519 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 524 { 525 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 526 PetscScalar value; 527 PetscErrorCode ierr; 528 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 529 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 530 PetscBool roworiented = aij->roworiented; 531 532 /* Some Variables required in the macro */ 533 Mat A = aij->A; 534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 535 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 536 MatScalar *aa = a->a; 537 PetscBool ignorezeroentries = a->ignorezeroentries; 538 Mat B = aij->B; 539 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 540 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 541 MatScalar *ba = b->a; 542 543 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 544 PetscInt nonew; 545 MatScalar *ap1,*ap2; 546 547 PetscFunctionBegin; 548 for (i=0; i<m; i++) { 549 if (im[i] < 0) continue; 550 #if defined(PETSC_USE_DEBUG) 551 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 552 #endif 553 if (im[i] >= rstart && im[i] < rend) { 554 row = im[i] - rstart; 555 lastcol1 = -1; 556 rp1 = aj + ai[row]; 557 ap1 = aa + ai[row]; 558 rmax1 = aimax[row]; 559 nrow1 = ailen[row]; 560 low1 = 0; 561 high1 = nrow1; 562 lastcol2 = -1; 563 rp2 = bj + bi[row]; 564 ap2 = ba + bi[row]; 565 rmax2 = bimax[row]; 566 nrow2 = bilen[row]; 567 low2 = 0; 568 high2 = nrow2; 569 570 for (j=0; j<n; j++) { 571 if (roworiented) value = v[i*n+j]; 572 else value = v[i+j*m]; 573 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 574 if (in[j] >= cstart && in[j] < cend) { 575 col = in[j] - cstart; 576 nonew = a->nonew; 577 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 578 } else if (in[j] < 0) continue; 579 #if defined(PETSC_USE_DEBUG) 580 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 581 #endif 582 else { 583 if (mat->was_assembled) { 584 if (!aij->colmap) { 585 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 586 } 587 #if defined(PETSC_USE_CTABLE) 588 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 589 col--; 590 #else 591 col = aij->colmap[in[j]] - 1; 592 #endif 593 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 594 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 595 col = in[j]; 596 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 597 B = aij->B; 598 b = (Mat_SeqAIJ*)B->data; 599 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 600 rp2 = bj + bi[row]; 601 ap2 = ba + bi[row]; 602 rmax2 = bimax[row]; 603 nrow2 = bilen[row]; 604 low2 = 0; 605 high2 = nrow2; 606 bm = aij->B->rmap->n; 607 ba = b->a; 608 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 609 } else col = in[j]; 610 nonew = b->nonew; 611 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 612 } 613 } 614 } else { 615 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 616 if (!aij->donotstash) { 617 mat->assembled = PETSC_FALSE; 618 if (roworiented) { 619 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 620 } else { 621 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 622 } 623 } 624 } 625 } 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 PetscErrorCode ierr; 633 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 634 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 635 636 PetscFunctionBegin; 637 for (i=0; i<m; i++) { 638 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 639 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 640 if (idxm[i] >= rstart && idxm[i] < rend) { 641 row = idxm[i] - rstart; 642 for (j=0; j<n; j++) { 643 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 644 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 645 if (idxn[j] >= cstart && idxn[j] < cend) { 646 col = idxn[j] - cstart; 647 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 648 } else { 649 if (!aij->colmap) { 650 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 651 } 652 #if defined(PETSC_USE_CTABLE) 653 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 654 col--; 655 #else 656 col = aij->colmap[idxn[j]] - 1; 657 #endif 658 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 659 else { 660 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 661 } 662 } 663 } 664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665 } 666 PetscFunctionReturn(0); 667 } 668 669 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 670 671 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 672 { 673 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 674 PetscErrorCode ierr; 675 PetscInt nstash,reallocs; 676 677 PetscFunctionBegin; 678 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 679 680 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 681 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 682 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 687 { 688 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 690 PetscErrorCode ierr; 691 PetscMPIInt n; 692 PetscInt i,j,rstart,ncols,flg; 693 PetscInt *row,*col; 694 PetscBool other_disassembled; 695 PetscScalar *val; 696 697 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 698 699 PetscFunctionBegin; 700 if (!aij->donotstash && !mat->nooffprocentries) { 701 while (1) { 702 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 703 if (!flg) break; 704 705 for (i=0; i<n; ) { 706 /* Now identify the consecutive vals belonging to the same row */ 707 for (j=i,rstart=row[j]; j<n; j++) { 708 if (row[j] != rstart) break; 709 } 710 if (j < n) ncols = j-i; 711 else ncols = n-i; 712 /* Now assemble all these values with a single function call */ 713 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 714 715 i = j; 716 } 717 } 718 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 719 } 720 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 721 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 722 723 /* determine if any processor has disassembled, if so we must 724 also disassemble ourselfs, in order that we may reassemble. */ 725 /* 726 if nonzero structure of submatrix B cannot change then we know that 727 no processor disassembled thus we can skip this stuff 728 */ 729 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 730 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 731 if (mat->was_assembled && !other_disassembled) { 732 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 733 } 734 } 735 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 736 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 737 } 738 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 739 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 740 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 741 742 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 743 744 aij->rowvalues = 0; 745 746 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 747 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 748 749 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 750 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 751 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 752 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 758 { 759 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 760 PetscErrorCode ierr; 761 762 PetscFunctionBegin; 763 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 764 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 769 { 770 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 771 PetscInt *lrows; 772 PetscInt r, len; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 /* get locally owned rows */ 777 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 778 /* fix right hand side if needed */ 779 if (x && b) { 780 const PetscScalar *xx; 781 PetscScalar *bb; 782 783 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 784 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 785 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 786 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 787 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 788 } 789 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 790 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 791 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 792 PetscBool cong; 793 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 794 if (cong) A->congruentlayouts = 1; 795 else A->congruentlayouts = 0; 796 } 797 if ((diag != 0.0) && A->congruentlayouts) { 798 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 799 } else if (diag != 0.0) { 800 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 801 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 802 for (r = 0; r < len; ++r) { 803 const PetscInt row = lrows[r] + A->rmap->rstart; 804 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 805 } 806 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 808 } else { 809 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 810 } 811 ierr = PetscFree(lrows);CHKERRQ(ierr); 812 813 /* only change matrix nonzero state if pattern was allowed to be changed */ 814 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 815 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 816 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 817 } 818 PetscFunctionReturn(0); 819 } 820 821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 822 { 823 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 824 PetscErrorCode ierr; 825 PetscMPIInt n = A->rmap->n; 826 PetscInt i,j,r,m,p = 0,len = 0; 827 PetscInt *lrows,*owners = A->rmap->range; 828 PetscSFNode *rrows; 829 PetscSF sf; 830 const PetscScalar *xx; 831 PetscScalar *bb,*mask; 832 Vec xmask,lmask; 833 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 834 const PetscInt *aj, *ii,*ridx; 835 PetscScalar *aa; 836 837 PetscFunctionBegin; 838 /* Create SF where leaves are input rows and roots are owned rows */ 839 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 840 for (r = 0; r < n; ++r) lrows[r] = -1; 841 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 842 for (r = 0; r < N; ++r) { 843 const PetscInt idx = rows[r]; 844 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 845 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 846 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 847 } 848 rrows[r].rank = p; 849 rrows[r].index = rows[r] - owners[p]; 850 } 851 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 852 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 853 /* Collect flags for rows to be zeroed */ 854 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 857 /* Compress and put in row numbers */ 858 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 861 /* handle off diagonal part of matrix */ 862 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 863 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 864 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 865 for (i=0; i<len; i++) bb[lrows[i]] = 1; 866 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 867 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 870 if (x) { 871 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 874 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 875 } 876 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 877 /* remove zeroed rows of off diagonal matrix */ 878 ii = aij->i; 879 for (i=0; i<len; i++) { 880 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 /* loop over all elements of off process part of matrix zeroing removed columns*/ 883 if (aij->compressedrow.use) { 884 m = aij->compressedrow.nrows; 885 ii = aij->compressedrow.i; 886 ridx = aij->compressedrow.rindex; 887 for (i=0; i<m; i++) { 888 n = ii[i+1] - ii[i]; 889 aj = aij->j + ii[i]; 890 aa = aij->a + ii[i]; 891 892 for (j=0; j<n; j++) { 893 if (PetscAbsScalar(mask[*aj])) { 894 if (b) bb[*ridx] -= *aa*xx[*aj]; 895 *aa = 0.0; 896 } 897 aa++; 898 aj++; 899 } 900 ridx++; 901 } 902 } else { /* do not use compressed row format */ 903 m = l->B->rmap->n; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[i] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 } 917 } 918 if (x) { 919 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 920 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 } 922 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 923 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 924 ierr = PetscFree(lrows);CHKERRQ(ierr); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 928 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 929 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 935 { 936 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 937 PetscErrorCode ierr; 938 PetscInt nt; 939 940 PetscFunctionBegin; 941 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 942 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 943 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 945 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 947 PetscFunctionReturn(0); 948 } 949 950 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 951 { 952 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 953 PetscErrorCode ierr; 954 955 PetscFunctionBegin; 956 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 960 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 961 { 962 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 967 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 968 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 969 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 976 PetscErrorCode ierr; 977 PetscBool merged; 978 979 PetscFunctionBegin; 980 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 981 /* do nondiagonal part */ 982 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 983 if (!merged) { 984 /* send it on its way */ 985 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 986 /* do local part */ 987 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 988 /* receive remote parts: note this assumes the values are not actually */ 989 /* added in yy until the next line, */ 990 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 991 } else { 992 /* do local part */ 993 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 994 /* send it on its way */ 995 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 996 /* values actually were received in the Begin() but we need to call this nop */ 997 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 998 } 999 PetscFunctionReturn(0); 1000 } 1001 1002 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1003 { 1004 MPI_Comm comm; 1005 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1006 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1007 IS Me,Notme; 1008 PetscErrorCode ierr; 1009 PetscInt M,N,first,last,*notme,i; 1010 PetscMPIInt size; 1011 1012 PetscFunctionBegin; 1013 /* Easy test: symmetric diagonal block */ 1014 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1015 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1016 if (!*f) PetscFunctionReturn(0); 1017 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1018 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1019 if (size == 1) PetscFunctionReturn(0); 1020 1021 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1022 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1023 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1024 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1025 for (i=0; i<first; i++) notme[i] = i; 1026 for (i=last; i<M; i++) notme[i-last+first] = i; 1027 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1028 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1029 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1030 Aoff = Aoffs[0]; 1031 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1032 Boff = Boffs[0]; 1033 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1034 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1035 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1036 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1037 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1038 ierr = PetscFree(notme);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 /* do nondiagonal part */ 1049 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1050 /* send it on its way */ 1051 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1052 /* do local part */ 1053 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1054 /* receive remote parts */ 1055 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 /* 1060 This only works correctly for square matrices where the subblock A->A is the 1061 diagonal block 1062 */ 1063 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1064 { 1065 PetscErrorCode ierr; 1066 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1067 1068 PetscFunctionBegin; 1069 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1070 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1071 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 1080 PetscFunctionBegin; 1081 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1082 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1087 { 1088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1089 PetscErrorCode ierr; 1090 1091 PetscFunctionBegin; 1092 #if defined(PETSC_USE_LOG) 1093 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1094 #endif 1095 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1096 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1097 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1098 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1099 #if defined(PETSC_USE_CTABLE) 1100 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1101 #else 1102 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1103 #endif 1104 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1105 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1106 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1107 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1108 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1109 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1110 1111 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1112 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1113 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1114 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1115 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1116 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1117 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1118 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1119 #if defined(PETSC_HAVE_ELEMENTAL) 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1121 #endif 1122 #if defined(PETSC_HAVE_HYPRE) 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1125 #endif 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1130 { 1131 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1132 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1133 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1134 PetscErrorCode ierr; 1135 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1136 int fd; 1137 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1138 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1139 PetscScalar *column_values; 1140 PetscInt message_count,flowcontrolcount; 1141 FILE *file; 1142 1143 PetscFunctionBegin; 1144 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1145 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1146 nz = A->nz + B->nz; 1147 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1148 if (!rank) { 1149 header[0] = MAT_FILE_CLASSID; 1150 header[1] = mat->rmap->N; 1151 header[2] = mat->cmap->N; 1152 1153 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1154 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1155 /* get largest number of rows any processor has */ 1156 rlen = mat->rmap->n; 1157 range = mat->rmap->range; 1158 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1159 } else { 1160 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1161 rlen = mat->rmap->n; 1162 } 1163 1164 /* load up the local row counts */ 1165 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1166 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1167 1168 /* store the row lengths to the file */ 1169 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1170 if (!rank) { 1171 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1172 for (i=1; i<size; i++) { 1173 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1174 rlen = range[i+1] - range[i]; 1175 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1176 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1177 } 1178 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1179 } else { 1180 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1181 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1182 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1183 } 1184 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1185 1186 /* load up the local column indices */ 1187 nzmax = nz; /* th processor needs space a largest processor needs */ 1188 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1189 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1190 cnt = 0; 1191 for (i=0; i<mat->rmap->n; i++) { 1192 for (j=B->i[i]; j<B->i[i+1]; j++) { 1193 if ((col = garray[B->j[j]]) > cstart) break; 1194 column_indices[cnt++] = col; 1195 } 1196 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1197 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1198 } 1199 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1200 1201 /* store the column indices to the file */ 1202 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1203 if (!rank) { 1204 MPI_Status status; 1205 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1206 for (i=1; i<size; i++) { 1207 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1208 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1209 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1210 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1211 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1212 } 1213 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1214 } else { 1215 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1216 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1217 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1218 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1219 } 1220 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1221 1222 /* load up the local column values */ 1223 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1224 cnt = 0; 1225 for (i=0; i<mat->rmap->n; i++) { 1226 for (j=B->i[i]; j<B->i[i+1]; j++) { 1227 if (garray[B->j[j]] > cstart) break; 1228 column_values[cnt++] = B->a[j]; 1229 } 1230 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1231 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1232 } 1233 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1234 1235 /* store the column values to the file */ 1236 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1237 if (!rank) { 1238 MPI_Status status; 1239 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1240 for (i=1; i<size; i++) { 1241 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1242 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1243 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1244 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1245 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1246 } 1247 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1248 } else { 1249 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1250 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1251 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1252 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1253 } 1254 ierr = PetscFree(column_values);CHKERRQ(ierr); 1255 1256 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1257 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #include <petscdraw.h> 1262 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1265 PetscErrorCode ierr; 1266 PetscMPIInt rank = aij->rank,size = aij->size; 1267 PetscBool isdraw,iascii,isbinary; 1268 PetscViewer sviewer; 1269 PetscViewerFormat format; 1270 1271 PetscFunctionBegin; 1272 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1273 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1274 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1275 if (iascii) { 1276 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1277 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1278 MatInfo info; 1279 PetscBool inodes; 1280 1281 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1282 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1283 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1284 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1285 if (!inodes) { 1286 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1287 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1288 } else { 1289 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1290 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1291 } 1292 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1293 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1294 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1295 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1296 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1297 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1298 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1299 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1302 PetscInt inodecount,inodelimit,*inodes; 1303 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1304 if (inodes) { 1305 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1306 } else { 1307 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1308 } 1309 PetscFunctionReturn(0); 1310 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1311 PetscFunctionReturn(0); 1312 } 1313 } else if (isbinary) { 1314 if (size == 1) { 1315 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1316 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1317 } else { 1318 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1319 } 1320 PetscFunctionReturn(0); 1321 } else if (isdraw) { 1322 PetscDraw draw; 1323 PetscBool isnull; 1324 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1325 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1326 if (isnull) PetscFunctionReturn(0); 1327 } 1328 1329 { 1330 /* assemble the entire matrix onto first processor. */ 1331 Mat A; 1332 Mat_SeqAIJ *Aloc; 1333 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1334 MatScalar *a; 1335 1336 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1337 if (!rank) { 1338 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1339 } else { 1340 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1341 } 1342 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1343 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1344 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1345 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1346 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1347 1348 /* copy over the A part */ 1349 Aloc = (Mat_SeqAIJ*)aij->A->data; 1350 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1351 row = mat->rmap->rstart; 1352 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1353 for (i=0; i<m; i++) { 1354 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1355 row++; 1356 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1357 } 1358 aj = Aloc->j; 1359 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1360 1361 /* copy over the B part */ 1362 Aloc = (Mat_SeqAIJ*)aij->B->data; 1363 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1364 row = mat->rmap->rstart; 1365 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1366 ct = cols; 1367 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1368 for (i=0; i<m; i++) { 1369 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1370 row++; 1371 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1372 } 1373 ierr = PetscFree(ct);CHKERRQ(ierr); 1374 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1376 /* 1377 Everyone has to call to draw the matrix since the graphics waits are 1378 synchronized across all processors that share the PetscDraw object 1379 */ 1380 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1381 if (!rank) { 1382 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1383 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1384 } 1385 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1386 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1387 ierr = MatDestroy(&A);CHKERRQ(ierr); 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1393 { 1394 PetscErrorCode ierr; 1395 PetscBool iascii,isdraw,issocket,isbinary; 1396 1397 PetscFunctionBegin; 1398 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1399 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1400 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1401 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1402 if (iascii || isdraw || isbinary || issocket) { 1403 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1409 { 1410 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1411 PetscErrorCode ierr; 1412 Vec bb1 = 0; 1413 PetscBool hasop; 1414 1415 PetscFunctionBegin; 1416 if (flag == SOR_APPLY_UPPER) { 1417 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1422 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1423 } 1424 1425 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1426 if (flag & SOR_ZERO_INITIAL_GUESS) { 1427 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1428 its--; 1429 } 1430 1431 while (its--) { 1432 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1433 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1434 1435 /* update rhs: bb1 = bb - B*x */ 1436 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1437 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1438 1439 /* local sweep */ 1440 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1441 } 1442 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1443 if (flag & SOR_ZERO_INITIAL_GUESS) { 1444 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1445 its--; 1446 } 1447 while (its--) { 1448 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1449 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1453 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1454 1455 /* local sweep */ 1456 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1457 } 1458 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1461 its--; 1462 } 1463 while (its--) { 1464 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1465 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1469 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1470 1471 /* local sweep */ 1472 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1473 } 1474 } else if (flag & SOR_EISENSTAT) { 1475 Vec xx1; 1476 1477 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1478 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1479 1480 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1481 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1482 if (!mat->diag) { 1483 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1484 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1485 } 1486 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1487 if (hasop) { 1488 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1489 } else { 1490 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1491 } 1492 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1493 1494 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1495 1496 /* local sweep */ 1497 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1498 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1499 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1500 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1501 1502 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1503 1504 matin->factorerrortype = mat->A->factorerrortype; 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1509 { 1510 Mat aA,aB,Aperm; 1511 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1512 PetscScalar *aa,*ba; 1513 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1514 PetscSF rowsf,sf; 1515 IS parcolp = NULL; 1516 PetscBool done; 1517 PetscErrorCode ierr; 1518 1519 PetscFunctionBegin; 1520 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1521 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1522 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1523 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1524 1525 /* Invert row permutation to find out where my rows should go */ 1526 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1527 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1528 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1529 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1530 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1531 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1532 1533 /* Invert column permutation to find out where my columns should go */ 1534 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1535 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1536 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1537 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1538 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1539 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1540 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1541 1542 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1543 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1544 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1545 1546 /* Find out where my gcols should go */ 1547 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1548 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1549 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1550 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1551 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1552 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1553 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1554 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1555 1556 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1557 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1558 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1559 for (i=0; i<m; i++) { 1560 PetscInt row = rdest[i],rowner; 1561 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1562 for (j=ai[i]; j<ai[i+1]; j++) { 1563 PetscInt cowner,col = cdest[aj[j]]; 1564 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1565 if (rowner == cowner) dnnz[i]++; 1566 else onnz[i]++; 1567 } 1568 for (j=bi[i]; j<bi[i+1]; j++) { 1569 PetscInt cowner,col = gcdest[bj[j]]; 1570 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1571 if (rowner == cowner) dnnz[i]++; 1572 else onnz[i]++; 1573 } 1574 } 1575 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1576 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1577 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1578 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1579 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1580 1581 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1582 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1583 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1584 for (i=0; i<m; i++) { 1585 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1586 PetscInt j0,rowlen; 1587 rowlen = ai[i+1] - ai[i]; 1588 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1589 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1590 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1591 } 1592 rowlen = bi[i+1] - bi[i]; 1593 for (j0=j=0; j<rowlen; j0=j) { 1594 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1595 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1596 } 1597 } 1598 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1599 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1600 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1601 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1602 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1603 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1604 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1605 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1606 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1607 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1608 *B = Aperm; 1609 PetscFunctionReturn(0); 1610 } 1611 1612 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1613 { 1614 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1619 if (ghosts) *ghosts = aij->garray; 1620 PetscFunctionReturn(0); 1621 } 1622 1623 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1624 { 1625 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1626 Mat A = mat->A,B = mat->B; 1627 PetscErrorCode ierr; 1628 PetscReal isend[5],irecv[5]; 1629 1630 PetscFunctionBegin; 1631 info->block_size = 1.0; 1632 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1633 1634 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1635 isend[3] = info->memory; isend[4] = info->mallocs; 1636 1637 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1638 1639 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1640 isend[3] += info->memory; isend[4] += info->mallocs; 1641 if (flag == MAT_LOCAL) { 1642 info->nz_used = isend[0]; 1643 info->nz_allocated = isend[1]; 1644 info->nz_unneeded = isend[2]; 1645 info->memory = isend[3]; 1646 info->mallocs = isend[4]; 1647 } else if (flag == MAT_GLOBAL_MAX) { 1648 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1649 1650 info->nz_used = irecv[0]; 1651 info->nz_allocated = irecv[1]; 1652 info->nz_unneeded = irecv[2]; 1653 info->memory = irecv[3]; 1654 info->mallocs = irecv[4]; 1655 } else if (flag == MAT_GLOBAL_SUM) { 1656 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1657 1658 info->nz_used = irecv[0]; 1659 info->nz_allocated = irecv[1]; 1660 info->nz_unneeded = irecv[2]; 1661 info->memory = irecv[3]; 1662 info->mallocs = irecv[4]; 1663 } 1664 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1665 info->fill_ratio_needed = 0; 1666 info->factor_mallocs = 0; 1667 PetscFunctionReturn(0); 1668 } 1669 1670 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1671 { 1672 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1673 PetscErrorCode ierr; 1674 1675 PetscFunctionBegin; 1676 switch (op) { 1677 case MAT_NEW_NONZERO_LOCATIONS: 1678 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1679 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1680 case MAT_KEEP_NONZERO_PATTERN: 1681 case MAT_NEW_NONZERO_LOCATION_ERR: 1682 case MAT_USE_INODES: 1683 case MAT_IGNORE_ZERO_ENTRIES: 1684 MatCheckPreallocated(A,1); 1685 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1686 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1687 break; 1688 case MAT_ROW_ORIENTED: 1689 MatCheckPreallocated(A,1); 1690 a->roworiented = flg; 1691 1692 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1693 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1694 break; 1695 case MAT_NEW_DIAGONALS: 1696 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1697 break; 1698 case MAT_IGNORE_OFF_PROC_ENTRIES: 1699 a->donotstash = flg; 1700 break; 1701 case MAT_SPD: 1702 A->spd_set = PETSC_TRUE; 1703 A->spd = flg; 1704 if (flg) { 1705 A->symmetric = PETSC_TRUE; 1706 A->structurally_symmetric = PETSC_TRUE; 1707 A->symmetric_set = PETSC_TRUE; 1708 A->structurally_symmetric_set = PETSC_TRUE; 1709 } 1710 break; 1711 case MAT_SYMMETRIC: 1712 MatCheckPreallocated(A,1); 1713 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1714 break; 1715 case MAT_STRUCTURALLY_SYMMETRIC: 1716 MatCheckPreallocated(A,1); 1717 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1718 break; 1719 case MAT_HERMITIAN: 1720 MatCheckPreallocated(A,1); 1721 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1722 break; 1723 case MAT_SYMMETRY_ETERNAL: 1724 MatCheckPreallocated(A,1); 1725 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1726 break; 1727 case MAT_SUBMAT_SINGLEIS: 1728 A->submat_singleis = flg; 1729 break; 1730 default: 1731 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1732 } 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1737 { 1738 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1739 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1740 PetscErrorCode ierr; 1741 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1742 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1743 PetscInt *cmap,*idx_p; 1744 1745 PetscFunctionBegin; 1746 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1747 mat->getrowactive = PETSC_TRUE; 1748 1749 if (!mat->rowvalues && (idx || v)) { 1750 /* 1751 allocate enough space to hold information from the longest row. 1752 */ 1753 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1754 PetscInt max = 1,tmp; 1755 for (i=0; i<matin->rmap->n; i++) { 1756 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1757 if (max < tmp) max = tmp; 1758 } 1759 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1760 } 1761 1762 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1763 lrow = row - rstart; 1764 1765 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1766 if (!v) {pvA = 0; pvB = 0;} 1767 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1768 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1769 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1770 nztot = nzA + nzB; 1771 1772 cmap = mat->garray; 1773 if (v || idx) { 1774 if (nztot) { 1775 /* Sort by increasing column numbers, assuming A and B already sorted */ 1776 PetscInt imark = -1; 1777 if (v) { 1778 *v = v_p = mat->rowvalues; 1779 for (i=0; i<nzB; i++) { 1780 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1781 else break; 1782 } 1783 imark = i; 1784 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1785 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1786 } 1787 if (idx) { 1788 *idx = idx_p = mat->rowindices; 1789 if (imark > -1) { 1790 for (i=0; i<imark; i++) { 1791 idx_p[i] = cmap[cworkB[i]]; 1792 } 1793 } else { 1794 for (i=0; i<nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1796 else break; 1797 } 1798 imark = i; 1799 } 1800 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1801 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 if (idx) *idx = 0; 1805 if (v) *v = 0; 1806 } 1807 } 1808 *nz = nztot; 1809 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1810 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1815 { 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1817 1818 PetscFunctionBegin; 1819 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1820 aij->getrowactive = PETSC_FALSE; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1825 { 1826 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1827 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1828 PetscErrorCode ierr; 1829 PetscInt i,j,cstart = mat->cmap->rstart; 1830 PetscReal sum = 0.0; 1831 MatScalar *v; 1832 1833 PetscFunctionBegin; 1834 if (aij->size == 1) { 1835 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1836 } else { 1837 if (type == NORM_FROBENIUS) { 1838 v = amat->a; 1839 for (i=0; i<amat->nz; i++) { 1840 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1841 } 1842 v = bmat->a; 1843 for (i=0; i<bmat->nz; i++) { 1844 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1845 } 1846 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1847 *norm = PetscSqrtReal(*norm); 1848 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1849 } else if (type == NORM_1) { /* max column norm */ 1850 PetscReal *tmp,*tmp2; 1851 PetscInt *jj,*garray = aij->garray; 1852 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1853 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1854 *norm = 0.0; 1855 v = amat->a; jj = amat->j; 1856 for (j=0; j<amat->nz; j++) { 1857 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1858 } 1859 v = bmat->a; jj = bmat->j; 1860 for (j=0; j<bmat->nz; j++) { 1861 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1862 } 1863 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1864 for (j=0; j<mat->cmap->N; j++) { 1865 if (tmp2[j] > *norm) *norm = tmp2[j]; 1866 } 1867 ierr = PetscFree(tmp);CHKERRQ(ierr); 1868 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1869 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1870 } else if (type == NORM_INFINITY) { /* max row norm */ 1871 PetscReal ntemp = 0.0; 1872 for (j=0; j<aij->A->rmap->n; j++) { 1873 v = amat->a + amat->i[j]; 1874 sum = 0.0; 1875 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1876 sum += PetscAbsScalar(*v); v++; 1877 } 1878 v = bmat->a + bmat->i[j]; 1879 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1880 sum += PetscAbsScalar(*v); v++; 1881 } 1882 if (sum > ntemp) ntemp = sum; 1883 } 1884 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1885 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1886 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1887 } 1888 PetscFunctionReturn(0); 1889 } 1890 1891 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1892 { 1893 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1894 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1895 PetscErrorCode ierr; 1896 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1897 PetscInt cstart = A->cmap->rstart,ncol; 1898 Mat B; 1899 MatScalar *array; 1900 1901 PetscFunctionBegin; 1902 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1903 1904 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1905 ai = Aloc->i; aj = Aloc->j; 1906 bi = Bloc->i; bj = Bloc->j; 1907 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1908 PetscInt *d_nnz,*g_nnz,*o_nnz; 1909 PetscSFNode *oloc; 1910 PETSC_UNUSED PetscSF sf; 1911 1912 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1913 /* compute d_nnz for preallocation */ 1914 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1915 for (i=0; i<ai[ma]; i++) { 1916 d_nnz[aj[i]]++; 1917 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1918 } 1919 /* compute local off-diagonal contributions */ 1920 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1921 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1922 /* map those to global */ 1923 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1924 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1925 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1926 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1927 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1928 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1929 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1930 1931 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1932 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1933 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1934 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1935 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1936 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1937 } else { 1938 B = *matout; 1939 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1940 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1941 } 1942 1943 /* copy over the A part */ 1944 array = Aloc->a; 1945 row = A->rmap->rstart; 1946 for (i=0; i<ma; i++) { 1947 ncol = ai[i+1]-ai[i]; 1948 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1949 row++; 1950 array += ncol; aj += ncol; 1951 } 1952 aj = Aloc->j; 1953 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1954 1955 /* copy over the B part */ 1956 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1957 array = Bloc->a; 1958 row = A->rmap->rstart; 1959 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1960 cols_tmp = cols; 1961 for (i=0; i<mb; i++) { 1962 ncol = bi[i+1]-bi[i]; 1963 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1964 row++; 1965 array += ncol; cols_tmp += ncol; 1966 } 1967 ierr = PetscFree(cols);CHKERRQ(ierr); 1968 1969 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1970 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1971 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1972 *matout = B; 1973 } else { 1974 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1975 } 1976 PetscFunctionReturn(0); 1977 } 1978 1979 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1980 { 1981 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1982 Mat a = aij->A,b = aij->B; 1983 PetscErrorCode ierr; 1984 PetscInt s1,s2,s3; 1985 1986 PetscFunctionBegin; 1987 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1988 if (rr) { 1989 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1990 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1991 /* Overlap communication with computation. */ 1992 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1993 } 1994 if (ll) { 1995 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1996 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1997 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1998 } 1999 /* scale the diagonal block */ 2000 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2001 2002 if (rr) { 2003 /* Do a scatter end and then right scale the off-diagonal block */ 2004 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2005 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2006 } 2007 PetscFunctionReturn(0); 2008 } 2009 2010 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2011 { 2012 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2013 PetscErrorCode ierr; 2014 2015 PetscFunctionBegin; 2016 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2021 { 2022 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2023 Mat a,b,c,d; 2024 PetscBool flg; 2025 PetscErrorCode ierr; 2026 2027 PetscFunctionBegin; 2028 a = matA->A; b = matA->B; 2029 c = matB->A; d = matB->B; 2030 2031 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2032 if (flg) { 2033 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2034 } 2035 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2040 { 2041 PetscErrorCode ierr; 2042 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2043 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2044 2045 PetscFunctionBegin; 2046 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2047 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2048 /* because of the column compression in the off-processor part of the matrix a->B, 2049 the number of columns in a->B and b->B may be different, hence we cannot call 2050 the MatCopy() directly on the two parts. If need be, we can provide a more 2051 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2052 then copying the submatrices */ 2053 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2054 } else { 2055 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2056 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2057 } 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2062 { 2063 PetscErrorCode ierr; 2064 2065 PetscFunctionBegin; 2066 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 /* 2071 Computes the number of nonzeros per row needed for preallocation when X and Y 2072 have different nonzero structure. 2073 */ 2074 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2075 { 2076 PetscInt i,j,k,nzx,nzy; 2077 2078 PetscFunctionBegin; 2079 /* Set the number of nonzeros in the new matrix */ 2080 for (i=0; i<m; i++) { 2081 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2082 nzx = xi[i+1] - xi[i]; 2083 nzy = yi[i+1] - yi[i]; 2084 nnz[i] = 0; 2085 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2086 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2087 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2088 nnz[i]++; 2089 } 2090 for (; k<nzy; k++) nnz[i]++; 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2096 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2097 { 2098 PetscErrorCode ierr; 2099 PetscInt m = Y->rmap->N; 2100 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2101 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2102 2103 PetscFunctionBegin; 2104 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2105 PetscFunctionReturn(0); 2106 } 2107 2108 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2109 { 2110 PetscErrorCode ierr; 2111 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2112 PetscBLASInt bnz,one=1; 2113 Mat_SeqAIJ *x,*y; 2114 2115 PetscFunctionBegin; 2116 if (str == SAME_NONZERO_PATTERN) { 2117 PetscScalar alpha = a; 2118 x = (Mat_SeqAIJ*)xx->A->data; 2119 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2120 y = (Mat_SeqAIJ*)yy->A->data; 2121 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2122 x = (Mat_SeqAIJ*)xx->B->data; 2123 y = (Mat_SeqAIJ*)yy->B->data; 2124 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2125 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2126 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2127 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2128 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2129 } else { 2130 Mat B; 2131 PetscInt *nnz_d,*nnz_o; 2132 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2133 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2134 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2135 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2136 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2137 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2138 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2139 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2140 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2141 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2142 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2143 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2144 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2145 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2151 2152 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2153 { 2154 #if defined(PETSC_USE_COMPLEX) 2155 PetscErrorCode ierr; 2156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2157 2158 PetscFunctionBegin; 2159 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2160 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2161 #else 2162 PetscFunctionBegin; 2163 #endif 2164 PetscFunctionReturn(0); 2165 } 2166 2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2168 { 2169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2174 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2179 { 2180 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2185 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2190 { 2191 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2192 PetscErrorCode ierr; 2193 PetscInt i,*idxb = 0; 2194 PetscScalar *va,*vb; 2195 Vec vtmp; 2196 2197 PetscFunctionBegin; 2198 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2199 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2200 if (idx) { 2201 for (i=0; i<A->rmap->n; i++) { 2202 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2203 } 2204 } 2205 2206 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2207 if (idx) { 2208 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2209 } 2210 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2211 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2212 2213 for (i=0; i<A->rmap->n; i++) { 2214 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2215 va[i] = vb[i]; 2216 if (idx) idx[i] = a->garray[idxb[i]]; 2217 } 2218 } 2219 2220 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2221 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2222 ierr = PetscFree(idxb);CHKERRQ(ierr); 2223 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2224 PetscFunctionReturn(0); 2225 } 2226 2227 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2228 { 2229 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2230 PetscErrorCode ierr; 2231 PetscInt i,*idxb = 0; 2232 PetscScalar *va,*vb; 2233 Vec vtmp; 2234 2235 PetscFunctionBegin; 2236 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2237 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2238 if (idx) { 2239 for (i=0; i<A->cmap->n; i++) { 2240 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2241 } 2242 } 2243 2244 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2245 if (idx) { 2246 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2247 } 2248 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2249 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2250 2251 for (i=0; i<A->rmap->n; i++) { 2252 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2253 va[i] = vb[i]; 2254 if (idx) idx[i] = a->garray[idxb[i]]; 2255 } 2256 } 2257 2258 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2259 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2260 ierr = PetscFree(idxb);CHKERRQ(ierr); 2261 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2262 PetscFunctionReturn(0); 2263 } 2264 2265 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2266 { 2267 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2268 PetscInt n = A->rmap->n; 2269 PetscInt cstart = A->cmap->rstart; 2270 PetscInt *cmap = mat->garray; 2271 PetscInt *diagIdx, *offdiagIdx; 2272 Vec diagV, offdiagV; 2273 PetscScalar *a, *diagA, *offdiagA; 2274 PetscInt r; 2275 PetscErrorCode ierr; 2276 2277 PetscFunctionBegin; 2278 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2279 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2280 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2281 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2282 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2283 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2284 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2285 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2286 for (r = 0; r < n; ++r) { 2287 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2288 a[r] = diagA[r]; 2289 idx[r] = cstart + diagIdx[r]; 2290 } else { 2291 a[r] = offdiagA[r]; 2292 idx[r] = cmap[offdiagIdx[r]]; 2293 } 2294 } 2295 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2296 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2297 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2298 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2299 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2300 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2305 { 2306 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2307 PetscInt n = A->rmap->n; 2308 PetscInt cstart = A->cmap->rstart; 2309 PetscInt *cmap = mat->garray; 2310 PetscInt *diagIdx, *offdiagIdx; 2311 Vec diagV, offdiagV; 2312 PetscScalar *a, *diagA, *offdiagA; 2313 PetscInt r; 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2318 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2319 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2320 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2321 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2322 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2323 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2324 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2325 for (r = 0; r < n; ++r) { 2326 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2327 a[r] = diagA[r]; 2328 idx[r] = cstart + diagIdx[r]; 2329 } else { 2330 a[r] = offdiagA[r]; 2331 idx[r] = cmap[offdiagIdx[r]]; 2332 } 2333 } 2334 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2335 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2336 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2337 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2338 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2339 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2344 { 2345 PetscErrorCode ierr; 2346 Mat *dummy; 2347 2348 PetscFunctionBegin; 2349 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2350 *newmat = *dummy; 2351 ierr = PetscFree(dummy);CHKERRQ(ierr); 2352 PetscFunctionReturn(0); 2353 } 2354 2355 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2356 { 2357 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2358 PetscErrorCode ierr; 2359 2360 PetscFunctionBegin; 2361 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2362 A->factorerrortype = a->A->factorerrortype; 2363 PetscFunctionReturn(0); 2364 } 2365 2366 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2367 { 2368 PetscErrorCode ierr; 2369 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2370 2371 PetscFunctionBegin; 2372 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2373 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2374 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2375 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2380 { 2381 PetscFunctionBegin; 2382 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2383 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2384 PetscFunctionReturn(0); 2385 } 2386 2387 /*@ 2388 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2389 2390 Collective on Mat 2391 2392 Input Parameters: 2393 + A - the matrix 2394 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2395 2396 Level: advanced 2397 2398 @*/ 2399 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2400 { 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2405 PetscFunctionReturn(0); 2406 } 2407 2408 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2409 { 2410 PetscErrorCode ierr; 2411 PetscBool sc = PETSC_FALSE,flg; 2412 2413 PetscFunctionBegin; 2414 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2415 ierr = PetscObjectOptionsBegin((PetscObject)A); 2416 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2417 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2418 if (flg) { 2419 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2420 } 2421 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2426 { 2427 PetscErrorCode ierr; 2428 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2429 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2430 2431 PetscFunctionBegin; 2432 if (!Y->preallocated) { 2433 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2434 } else if (!aij->nz) { 2435 PetscInt nonew = aij->nonew; 2436 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2437 aij->nonew = nonew; 2438 } 2439 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2444 { 2445 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2446 PetscErrorCode ierr; 2447 2448 PetscFunctionBegin; 2449 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2450 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2451 if (d) { 2452 PetscInt rstart; 2453 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2454 *d += rstart; 2455 2456 } 2457 PetscFunctionReturn(0); 2458 } 2459 2460 2461 /* -------------------------------------------------------------------*/ 2462 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2463 MatGetRow_MPIAIJ, 2464 MatRestoreRow_MPIAIJ, 2465 MatMult_MPIAIJ, 2466 /* 4*/ MatMultAdd_MPIAIJ, 2467 MatMultTranspose_MPIAIJ, 2468 MatMultTransposeAdd_MPIAIJ, 2469 0, 2470 0, 2471 0, 2472 /*10*/ 0, 2473 0, 2474 0, 2475 MatSOR_MPIAIJ, 2476 MatTranspose_MPIAIJ, 2477 /*15*/ MatGetInfo_MPIAIJ, 2478 MatEqual_MPIAIJ, 2479 MatGetDiagonal_MPIAIJ, 2480 MatDiagonalScale_MPIAIJ, 2481 MatNorm_MPIAIJ, 2482 /*20*/ MatAssemblyBegin_MPIAIJ, 2483 MatAssemblyEnd_MPIAIJ, 2484 MatSetOption_MPIAIJ, 2485 MatZeroEntries_MPIAIJ, 2486 /*24*/ MatZeroRows_MPIAIJ, 2487 0, 2488 0, 2489 0, 2490 0, 2491 /*29*/ MatSetUp_MPIAIJ, 2492 0, 2493 0, 2494 MatGetDiagonalBlock_MPIAIJ, 2495 0, 2496 /*34*/ MatDuplicate_MPIAIJ, 2497 0, 2498 0, 2499 0, 2500 0, 2501 /*39*/ MatAXPY_MPIAIJ, 2502 MatGetSubMatrices_MPIAIJ, 2503 MatIncreaseOverlap_MPIAIJ, 2504 MatGetValues_MPIAIJ, 2505 MatCopy_MPIAIJ, 2506 /*44*/ MatGetRowMax_MPIAIJ, 2507 MatScale_MPIAIJ, 2508 MatShift_MPIAIJ, 2509 MatDiagonalSet_MPIAIJ, 2510 MatZeroRowsColumns_MPIAIJ, 2511 /*49*/ MatSetRandom_MPIAIJ, 2512 0, 2513 0, 2514 0, 2515 0, 2516 /*54*/ MatFDColoringCreate_MPIXAIJ, 2517 0, 2518 MatSetUnfactored_MPIAIJ, 2519 MatPermute_MPIAIJ, 2520 0, 2521 /*59*/ MatGetSubMatrix_MPIAIJ, 2522 MatDestroy_MPIAIJ, 2523 MatView_MPIAIJ, 2524 0, 2525 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2526 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2527 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2528 0, 2529 0, 2530 0, 2531 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2532 MatGetRowMinAbs_MPIAIJ, 2533 0, 2534 0, 2535 0, 2536 0, 2537 /*75*/ MatFDColoringApply_AIJ, 2538 MatSetFromOptions_MPIAIJ, 2539 0, 2540 0, 2541 MatFindZeroDiagonals_MPIAIJ, 2542 /*80*/ 0, 2543 0, 2544 0, 2545 /*83*/ MatLoad_MPIAIJ, 2546 0, 2547 0, 2548 0, 2549 0, 2550 0, 2551 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2552 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2553 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2554 MatPtAP_MPIAIJ_MPIAIJ, 2555 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2556 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2557 0, 2558 0, 2559 0, 2560 0, 2561 /*99*/ 0, 2562 0, 2563 0, 2564 MatConjugate_MPIAIJ, 2565 0, 2566 /*104*/MatSetValuesRow_MPIAIJ, 2567 MatRealPart_MPIAIJ, 2568 MatImaginaryPart_MPIAIJ, 2569 0, 2570 0, 2571 /*109*/0, 2572 0, 2573 MatGetRowMin_MPIAIJ, 2574 0, 2575 MatMissingDiagonal_MPIAIJ, 2576 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2577 0, 2578 MatGetGhosts_MPIAIJ, 2579 0, 2580 0, 2581 /*119*/0, 2582 0, 2583 0, 2584 0, 2585 MatGetMultiProcBlock_MPIAIJ, 2586 /*124*/MatFindNonzeroRows_MPIAIJ, 2587 MatGetColumnNorms_MPIAIJ, 2588 MatInvertBlockDiagonal_MPIAIJ, 2589 0, 2590 MatGetSubMatricesMPI_MPIAIJ, 2591 /*129*/0, 2592 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2593 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2594 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2595 0, 2596 /*134*/0, 2597 0, 2598 0, 2599 0, 2600 0, 2601 /*139*/MatSetBlockSizes_MPIAIJ, 2602 0, 2603 0, 2604 MatFDColoringSetUp_MPIXAIJ, 2605 MatFindOffBlockDiagonalEntries_MPIAIJ, 2606 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2607 }; 2608 2609 /* ----------------------------------------------------------------------------------------*/ 2610 2611 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2612 { 2613 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2614 PetscErrorCode ierr; 2615 2616 PetscFunctionBegin; 2617 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2618 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2619 PetscFunctionReturn(0); 2620 } 2621 2622 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2623 { 2624 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2625 PetscErrorCode ierr; 2626 2627 PetscFunctionBegin; 2628 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2629 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2634 { 2635 Mat_MPIAIJ *b; 2636 PetscErrorCode ierr; 2637 2638 PetscFunctionBegin; 2639 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2640 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2641 b = (Mat_MPIAIJ*)B->data; 2642 2643 #if defined(PETSC_USE_CTABLE) 2644 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2645 #else 2646 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2647 #endif 2648 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2649 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2650 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2651 2652 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2653 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2654 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2655 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2656 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2657 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2658 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2659 2660 if (!B->preallocated) { 2661 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2662 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2663 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2664 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2665 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2666 } 2667 2668 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2669 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2670 B->preallocated = PETSC_TRUE; 2671 B->was_assembled = PETSC_FALSE; 2672 B->assembled = PETSC_FALSE;; 2673 PetscFunctionReturn(0); 2674 } 2675 2676 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2677 { 2678 Mat mat; 2679 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2680 PetscErrorCode ierr; 2681 2682 PetscFunctionBegin; 2683 *newmat = 0; 2684 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2685 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2686 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2687 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2688 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2689 a = (Mat_MPIAIJ*)mat->data; 2690 2691 mat->factortype = matin->factortype; 2692 mat->assembled = PETSC_TRUE; 2693 mat->insertmode = NOT_SET_VALUES; 2694 mat->preallocated = PETSC_TRUE; 2695 2696 a->size = oldmat->size; 2697 a->rank = oldmat->rank; 2698 a->donotstash = oldmat->donotstash; 2699 a->roworiented = oldmat->roworiented; 2700 a->rowindices = 0; 2701 a->rowvalues = 0; 2702 a->getrowactive = PETSC_FALSE; 2703 2704 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2705 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2706 2707 if (oldmat->colmap) { 2708 #if defined(PETSC_USE_CTABLE) 2709 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2710 #else 2711 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2712 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2713 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2714 #endif 2715 } else a->colmap = 0; 2716 if (oldmat->garray) { 2717 PetscInt len; 2718 len = oldmat->B->cmap->n; 2719 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2720 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2721 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2722 } else a->garray = 0; 2723 2724 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2725 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2726 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2727 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2728 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2729 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2730 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2731 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2732 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2733 *newmat = mat; 2734 PetscFunctionReturn(0); 2735 } 2736 2737 2738 2739 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2740 { 2741 PetscScalar *vals,*svals; 2742 MPI_Comm comm; 2743 PetscErrorCode ierr; 2744 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2745 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2746 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2747 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2748 PetscInt cend,cstart,n,*rowners; 2749 int fd; 2750 PetscInt bs = newMat->rmap->bs; 2751 2752 PetscFunctionBegin; 2753 /* force binary viewer to load .info file if it has not yet done so */ 2754 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2755 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2756 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2757 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2758 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2759 if (!rank) { 2760 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2761 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2762 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2763 } 2764 2765 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2766 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2767 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2768 if (bs < 0) bs = 1; 2769 2770 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2771 M = header[1]; N = header[2]; 2772 2773 /* If global sizes are set, check if they are consistent with that given in the file */ 2774 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2775 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2776 2777 /* determine ownership of all (block) rows */ 2778 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2779 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2780 else m = newMat->rmap->n; /* Set by user */ 2781 2782 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2783 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2784 2785 /* First process needs enough room for process with most rows */ 2786 if (!rank) { 2787 mmax = rowners[1]; 2788 for (i=2; i<=size; i++) { 2789 mmax = PetscMax(mmax, rowners[i]); 2790 } 2791 } else mmax = -1; /* unused, but compilers complain */ 2792 2793 rowners[0] = 0; 2794 for (i=2; i<=size; i++) { 2795 rowners[i] += rowners[i-1]; 2796 } 2797 rstart = rowners[rank]; 2798 rend = rowners[rank+1]; 2799 2800 /* distribute row lengths to all processors */ 2801 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2802 if (!rank) { 2803 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2804 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2805 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2806 for (j=0; j<m; j++) { 2807 procsnz[0] += ourlens[j]; 2808 } 2809 for (i=1; i<size; i++) { 2810 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2811 /* calculate the number of nonzeros on each processor */ 2812 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2813 procsnz[i] += rowlengths[j]; 2814 } 2815 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2816 } 2817 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2818 } else { 2819 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2820 } 2821 2822 if (!rank) { 2823 /* determine max buffer needed and allocate it */ 2824 maxnz = 0; 2825 for (i=0; i<size; i++) { 2826 maxnz = PetscMax(maxnz,procsnz[i]); 2827 } 2828 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2829 2830 /* read in my part of the matrix column indices */ 2831 nz = procsnz[0]; 2832 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2833 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2834 2835 /* read in every one elses and ship off */ 2836 for (i=1; i<size; i++) { 2837 nz = procsnz[i]; 2838 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2839 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2840 } 2841 ierr = PetscFree(cols);CHKERRQ(ierr); 2842 } else { 2843 /* determine buffer space needed for message */ 2844 nz = 0; 2845 for (i=0; i<m; i++) { 2846 nz += ourlens[i]; 2847 } 2848 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2849 2850 /* receive message of column indices*/ 2851 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2852 } 2853 2854 /* determine column ownership if matrix is not square */ 2855 if (N != M) { 2856 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2857 else n = newMat->cmap->n; 2858 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2859 cstart = cend - n; 2860 } else { 2861 cstart = rstart; 2862 cend = rend; 2863 n = cend - cstart; 2864 } 2865 2866 /* loop over local rows, determining number of off diagonal entries */ 2867 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2868 jj = 0; 2869 for (i=0; i<m; i++) { 2870 for (j=0; j<ourlens[i]; j++) { 2871 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2872 jj++; 2873 } 2874 } 2875 2876 for (i=0; i<m; i++) { 2877 ourlens[i] -= offlens[i]; 2878 } 2879 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2880 2881 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2882 2883 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2884 2885 for (i=0; i<m; i++) { 2886 ourlens[i] += offlens[i]; 2887 } 2888 2889 if (!rank) { 2890 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2891 2892 /* read in my part of the matrix numerical values */ 2893 nz = procsnz[0]; 2894 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2895 2896 /* insert into matrix */ 2897 jj = rstart; 2898 smycols = mycols; 2899 svals = vals; 2900 for (i=0; i<m; i++) { 2901 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2902 smycols += ourlens[i]; 2903 svals += ourlens[i]; 2904 jj++; 2905 } 2906 2907 /* read in other processors and ship out */ 2908 for (i=1; i<size; i++) { 2909 nz = procsnz[i]; 2910 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2911 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2912 } 2913 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2914 } else { 2915 /* receive numeric values */ 2916 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2917 2918 /* receive message of values*/ 2919 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2920 2921 /* insert into matrix */ 2922 jj = rstart; 2923 smycols = mycols; 2924 svals = vals; 2925 for (i=0; i<m; i++) { 2926 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2927 smycols += ourlens[i]; 2928 svals += ourlens[i]; 2929 jj++; 2930 } 2931 } 2932 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2933 ierr = PetscFree(vals);CHKERRQ(ierr); 2934 ierr = PetscFree(mycols);CHKERRQ(ierr); 2935 ierr = PetscFree(rowners);CHKERRQ(ierr); 2936 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2937 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2938 PetscFunctionReturn(0); 2939 } 2940 2941 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 2942 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2943 { 2944 PetscErrorCode ierr; 2945 IS iscol_local; 2946 PetscInt csize; 2947 2948 PetscFunctionBegin; 2949 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2950 if (call == MAT_REUSE_MATRIX) { 2951 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2952 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2953 } else { 2954 /* check if we are grabbing all columns*/ 2955 PetscBool isstride; 2956 PetscMPIInt lisstride = 0,gisstride; 2957 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2958 if (isstride) { 2959 PetscInt start,len,mstart,mlen; 2960 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2961 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2962 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 2963 if (mstart == start && mlen-mstart == len) lisstride = 1; 2964 } 2965 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2966 if (gisstride) { 2967 PetscInt N; 2968 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 2969 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 2970 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 2971 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 2972 } else { 2973 PetscInt cbs; 2974 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 2975 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2976 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 2977 } 2978 } 2979 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2980 if (call == MAT_INITIAL_MATRIX) { 2981 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2982 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2983 } 2984 PetscFunctionReturn(0); 2985 } 2986 2987 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 2988 /* 2989 Not great since it makes two copies of the submatrix, first an SeqAIJ 2990 in local and then by concatenating the local matrices the end result. 2991 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2992 2993 Note: This requires a sequential iscol with all indices. 2994 */ 2995 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2996 { 2997 PetscErrorCode ierr; 2998 PetscMPIInt rank,size; 2999 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3000 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3001 PetscBool allcolumns, colflag; 3002 Mat M,Mreuse; 3003 MatScalar *vwork,*aa; 3004 MPI_Comm comm; 3005 Mat_SeqAIJ *aij; 3006 3007 PetscFunctionBegin; 3008 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3009 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3010 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3011 3012 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3013 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3014 if (colflag && ncol == mat->cmap->N) { 3015 allcolumns = PETSC_TRUE; 3016 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3017 } else { 3018 allcolumns = PETSC_FALSE; 3019 } 3020 if (call == MAT_REUSE_MATRIX) { 3021 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3022 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3023 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3024 } else { 3025 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3026 } 3027 3028 /* 3029 m - number of local rows 3030 n - number of columns (same on all processors) 3031 rstart - first row in new global matrix generated 3032 */ 3033 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3034 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3035 if (call == MAT_INITIAL_MATRIX) { 3036 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3037 ii = aij->i; 3038 jj = aij->j; 3039 3040 /* 3041 Determine the number of non-zeros in the diagonal and off-diagonal 3042 portions of the matrix in order to do correct preallocation 3043 */ 3044 3045 /* first get start and end of "diagonal" columns */ 3046 if (csize == PETSC_DECIDE) { 3047 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3048 if (mglobal == n) { /* square matrix */ 3049 nlocal = m; 3050 } else { 3051 nlocal = n/size + ((n % size) > rank); 3052 } 3053 } else { 3054 nlocal = csize; 3055 } 3056 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3057 rstart = rend - nlocal; 3058 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3059 3060 /* next, compute all the lengths */ 3061 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3062 olens = dlens + m; 3063 for (i=0; i<m; i++) { 3064 jend = ii[i+1] - ii[i]; 3065 olen = 0; 3066 dlen = 0; 3067 for (j=0; j<jend; j++) { 3068 if (*jj < rstart || *jj >= rend) olen++; 3069 else dlen++; 3070 jj++; 3071 } 3072 olens[i] = olen; 3073 dlens[i] = dlen; 3074 } 3075 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3076 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3077 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3078 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3079 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3080 ierr = PetscFree(dlens);CHKERRQ(ierr); 3081 } else { 3082 PetscInt ml,nl; 3083 3084 M = *newmat; 3085 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3086 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3087 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3088 /* 3089 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3090 rather than the slower MatSetValues(). 3091 */ 3092 M->was_assembled = PETSC_TRUE; 3093 M->assembled = PETSC_FALSE; 3094 } 3095 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3096 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3097 ii = aij->i; 3098 jj = aij->j; 3099 aa = aij->a; 3100 for (i=0; i<m; i++) { 3101 row = rstart + i; 3102 nz = ii[i+1] - ii[i]; 3103 cwork = jj; jj += nz; 3104 vwork = aa; aa += nz; 3105 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3106 } 3107 3108 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3110 *newmat = M; 3111 3112 /* save submatrix used in processor for next request */ 3113 if (call == MAT_INITIAL_MATRIX) { 3114 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3115 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3116 } 3117 PetscFunctionReturn(0); 3118 } 3119 3120 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3121 { 3122 PetscInt m,cstart, cend,j,nnz,i,d; 3123 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3124 const PetscInt *JJ; 3125 PetscScalar *values; 3126 PetscErrorCode ierr; 3127 3128 PetscFunctionBegin; 3129 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3130 3131 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3132 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3133 m = B->rmap->n; 3134 cstart = B->cmap->rstart; 3135 cend = B->cmap->rend; 3136 rstart = B->rmap->rstart; 3137 3138 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3139 3140 #if defined(PETSC_USE_DEBUGGING) 3141 for (i=0; i<m; i++) { 3142 nnz = Ii[i+1]- Ii[i]; 3143 JJ = J + Ii[i]; 3144 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3145 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3146 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3147 } 3148 #endif 3149 3150 for (i=0; i<m; i++) { 3151 nnz = Ii[i+1]- Ii[i]; 3152 JJ = J + Ii[i]; 3153 nnz_max = PetscMax(nnz_max,nnz); 3154 d = 0; 3155 for (j=0; j<nnz; j++) { 3156 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3157 } 3158 d_nnz[i] = d; 3159 o_nnz[i] = nnz - d; 3160 } 3161 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3162 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3163 3164 if (v) values = (PetscScalar*)v; 3165 else { 3166 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3167 } 3168 3169 for (i=0; i<m; i++) { 3170 ii = i + rstart; 3171 nnz = Ii[i+1]- Ii[i]; 3172 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3173 } 3174 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3175 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3176 3177 if (!v) { 3178 ierr = PetscFree(values);CHKERRQ(ierr); 3179 } 3180 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3181 PetscFunctionReturn(0); 3182 } 3183 3184 /*@ 3185 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3186 (the default parallel PETSc format). 3187 3188 Collective on MPI_Comm 3189 3190 Input Parameters: 3191 + B - the matrix 3192 . i - the indices into j for the start of each local row (starts with zero) 3193 . j - the column indices for each local row (starts with zero) 3194 - v - optional values in the matrix 3195 3196 Level: developer 3197 3198 Notes: 3199 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3200 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3201 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3202 3203 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3204 3205 The format which is used for the sparse matrix input, is equivalent to a 3206 row-major ordering.. i.e for the following matrix, the input data expected is 3207 as shown 3208 3209 $ 1 0 0 3210 $ 2 0 3 P0 3211 $ ------- 3212 $ 4 5 6 P1 3213 $ 3214 $ Process0 [P0]: rows_owned=[0,1] 3215 $ i = {0,1,3} [size = nrow+1 = 2+1] 3216 $ j = {0,0,2} [size = 3] 3217 $ v = {1,2,3} [size = 3] 3218 $ 3219 $ Process1 [P1]: rows_owned=[2] 3220 $ i = {0,3} [size = nrow+1 = 1+1] 3221 $ j = {0,1,2} [size = 3] 3222 $ v = {4,5,6} [size = 3] 3223 3224 .keywords: matrix, aij, compressed row, sparse, parallel 3225 3226 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3227 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3228 @*/ 3229 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3230 { 3231 PetscErrorCode ierr; 3232 3233 PetscFunctionBegin; 3234 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3235 PetscFunctionReturn(0); 3236 } 3237 3238 /*@C 3239 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3240 (the default parallel PETSc format). For good matrix assembly performance 3241 the user should preallocate the matrix storage by setting the parameters 3242 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3243 performance can be increased by more than a factor of 50. 3244 3245 Collective on MPI_Comm 3246 3247 Input Parameters: 3248 + B - the matrix 3249 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3250 (same value is used for all local rows) 3251 . d_nnz - array containing the number of nonzeros in the various rows of the 3252 DIAGONAL portion of the local submatrix (possibly different for each row) 3253 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3254 The size of this array is equal to the number of local rows, i.e 'm'. 3255 For matrices that will be factored, you must leave room for (and set) 3256 the diagonal entry even if it is zero. 3257 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3258 submatrix (same value is used for all local rows). 3259 - o_nnz - array containing the number of nonzeros in the various rows of the 3260 OFF-DIAGONAL portion of the local submatrix (possibly different for 3261 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3262 structure. The size of this array is equal to the number 3263 of local rows, i.e 'm'. 3264 3265 If the *_nnz parameter is given then the *_nz parameter is ignored 3266 3267 The AIJ format (also called the Yale sparse matrix format or 3268 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3269 storage. The stored row and column indices begin with zero. 3270 See Users-Manual: ch_mat for details. 3271 3272 The parallel matrix is partitioned such that the first m0 rows belong to 3273 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3274 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3275 3276 The DIAGONAL portion of the local submatrix of a processor can be defined 3277 as the submatrix which is obtained by extraction the part corresponding to 3278 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3279 first row that belongs to the processor, r2 is the last row belonging to 3280 the this processor, and c1-c2 is range of indices of the local part of a 3281 vector suitable for applying the matrix to. This is an mxn matrix. In the 3282 common case of a square matrix, the row and column ranges are the same and 3283 the DIAGONAL part is also square. The remaining portion of the local 3284 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3285 3286 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3287 3288 You can call MatGetInfo() to get information on how effective the preallocation was; 3289 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3290 You can also run with the option -info and look for messages with the string 3291 malloc in them to see if additional memory allocation was needed. 3292 3293 Example usage: 3294 3295 Consider the following 8x8 matrix with 34 non-zero values, that is 3296 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3297 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3298 as follows: 3299 3300 .vb 3301 1 2 0 | 0 3 0 | 0 4 3302 Proc0 0 5 6 | 7 0 0 | 8 0 3303 9 0 10 | 11 0 0 | 12 0 3304 ------------------------------------- 3305 13 0 14 | 15 16 17 | 0 0 3306 Proc1 0 18 0 | 19 20 21 | 0 0 3307 0 0 0 | 22 23 0 | 24 0 3308 ------------------------------------- 3309 Proc2 25 26 27 | 0 0 28 | 29 0 3310 30 0 0 | 31 32 33 | 0 34 3311 .ve 3312 3313 This can be represented as a collection of submatrices as: 3314 3315 .vb 3316 A B C 3317 D E F 3318 G H I 3319 .ve 3320 3321 Where the submatrices A,B,C are owned by proc0, D,E,F are 3322 owned by proc1, G,H,I are owned by proc2. 3323 3324 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3325 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3326 The 'M','N' parameters are 8,8, and have the same values on all procs. 3327 3328 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3329 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3330 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3331 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3332 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3333 matrix, ans [DF] as another SeqAIJ matrix. 3334 3335 When d_nz, o_nz parameters are specified, d_nz storage elements are 3336 allocated for every row of the local diagonal submatrix, and o_nz 3337 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3338 One way to choose d_nz and o_nz is to use the max nonzerors per local 3339 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3340 In this case, the values of d_nz,o_nz are: 3341 .vb 3342 proc0 : dnz = 2, o_nz = 2 3343 proc1 : dnz = 3, o_nz = 2 3344 proc2 : dnz = 1, o_nz = 4 3345 .ve 3346 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3347 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3348 for proc3. i.e we are using 12+15+10=37 storage locations to store 3349 34 values. 3350 3351 When d_nnz, o_nnz parameters are specified, the storage is specified 3352 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3353 In the above case the values for d_nnz,o_nnz are: 3354 .vb 3355 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3356 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3357 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3358 .ve 3359 Here the space allocated is sum of all the above values i.e 34, and 3360 hence pre-allocation is perfect. 3361 3362 Level: intermediate 3363 3364 .keywords: matrix, aij, compressed row, sparse, parallel 3365 3366 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3367 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3368 @*/ 3369 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3370 { 3371 PetscErrorCode ierr; 3372 3373 PetscFunctionBegin; 3374 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3375 PetscValidType(B,1); 3376 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3377 PetscFunctionReturn(0); 3378 } 3379 3380 /*@ 3381 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3382 CSR format the local rows. 3383 3384 Collective on MPI_Comm 3385 3386 Input Parameters: 3387 + comm - MPI communicator 3388 . m - number of local rows (Cannot be PETSC_DECIDE) 3389 . n - This value should be the same as the local size used in creating the 3390 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3391 calculated if N is given) For square matrices n is almost always m. 3392 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3393 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3394 . i - row indices 3395 . j - column indices 3396 - a - matrix values 3397 3398 Output Parameter: 3399 . mat - the matrix 3400 3401 Level: intermediate 3402 3403 Notes: 3404 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3405 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3406 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3407 3408 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3409 3410 The format which is used for the sparse matrix input, is equivalent to a 3411 row-major ordering.. i.e for the following matrix, the input data expected is 3412 as shown 3413 3414 $ 1 0 0 3415 $ 2 0 3 P0 3416 $ ------- 3417 $ 4 5 6 P1 3418 $ 3419 $ Process0 [P0]: rows_owned=[0,1] 3420 $ i = {0,1,3} [size = nrow+1 = 2+1] 3421 $ j = {0,0,2} [size = 3] 3422 $ v = {1,2,3} [size = 3] 3423 $ 3424 $ Process1 [P1]: rows_owned=[2] 3425 $ i = {0,3} [size = nrow+1 = 1+1] 3426 $ j = {0,1,2} [size = 3] 3427 $ v = {4,5,6} [size = 3] 3428 3429 .keywords: matrix, aij, compressed row, sparse, parallel 3430 3431 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3432 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3433 @*/ 3434 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3435 { 3436 PetscErrorCode ierr; 3437 3438 PetscFunctionBegin; 3439 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3440 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3441 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3442 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3443 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3444 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3445 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3446 PetscFunctionReturn(0); 3447 } 3448 3449 /*@C 3450 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3451 (the default parallel PETSc format). For good matrix assembly performance 3452 the user should preallocate the matrix storage by setting the parameters 3453 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3454 performance can be increased by more than a factor of 50. 3455 3456 Collective on MPI_Comm 3457 3458 Input Parameters: 3459 + comm - MPI communicator 3460 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3461 This value should be the same as the local size used in creating the 3462 y vector for the matrix-vector product y = Ax. 3463 . n - This value should be the same as the local size used in creating the 3464 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3465 calculated if N is given) For square matrices n is almost always m. 3466 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3467 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3468 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3469 (same value is used for all local rows) 3470 . d_nnz - array containing the number of nonzeros in the various rows of the 3471 DIAGONAL portion of the local submatrix (possibly different for each row) 3472 or NULL, if d_nz is used to specify the nonzero structure. 3473 The size of this array is equal to the number of local rows, i.e 'm'. 3474 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3475 submatrix (same value is used for all local rows). 3476 - o_nnz - array containing the number of nonzeros in the various rows of the 3477 OFF-DIAGONAL portion of the local submatrix (possibly different for 3478 each row) or NULL, if o_nz is used to specify the nonzero 3479 structure. The size of this array is equal to the number 3480 of local rows, i.e 'm'. 3481 3482 Output Parameter: 3483 . A - the matrix 3484 3485 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3486 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3487 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3488 3489 Notes: 3490 If the *_nnz parameter is given then the *_nz parameter is ignored 3491 3492 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3493 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3494 storage requirements for this matrix. 3495 3496 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3497 processor than it must be used on all processors that share the object for 3498 that argument. 3499 3500 The user MUST specify either the local or global matrix dimensions 3501 (possibly both). 3502 3503 The parallel matrix is partitioned across processors such that the 3504 first m0 rows belong to process 0, the next m1 rows belong to 3505 process 1, the next m2 rows belong to process 2 etc.. where 3506 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3507 values corresponding to [m x N] submatrix. 3508 3509 The columns are logically partitioned with the n0 columns belonging 3510 to 0th partition, the next n1 columns belonging to the next 3511 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3512 3513 The DIAGONAL portion of the local submatrix on any given processor 3514 is the submatrix corresponding to the rows and columns m,n 3515 corresponding to the given processor. i.e diagonal matrix on 3516 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3517 etc. The remaining portion of the local submatrix [m x (N-n)] 3518 constitute the OFF-DIAGONAL portion. The example below better 3519 illustrates this concept. 3520 3521 For a square global matrix we define each processor's diagonal portion 3522 to be its local rows and the corresponding columns (a square submatrix); 3523 each processor's off-diagonal portion encompasses the remainder of the 3524 local matrix (a rectangular submatrix). 3525 3526 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3527 3528 When calling this routine with a single process communicator, a matrix of 3529 type SEQAIJ is returned. If a matrix of type MATMPIAIJ is desired for this 3530 type of communicator, use the construction mechanism: 3531 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3532 3533 By default, this format uses inodes (identical nodes) when possible. 3534 We search for consecutive rows with the same nonzero structure, thereby 3535 reusing matrix information to achieve increased efficiency. 3536 3537 Options Database Keys: 3538 + -mat_no_inode - Do not use inodes 3539 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3540 - -mat_aij_oneindex - Internally use indexing starting at 1 3541 rather than 0. Note that when calling MatSetValues(), 3542 the user still MUST index entries starting at 0! 3543 3544 3545 Example usage: 3546 3547 Consider the following 8x8 matrix with 34 non-zero values, that is 3548 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3549 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3550 as follows: 3551 3552 .vb 3553 1 2 0 | 0 3 0 | 0 4 3554 Proc0 0 5 6 | 7 0 0 | 8 0 3555 9 0 10 | 11 0 0 | 12 0 3556 ------------------------------------- 3557 13 0 14 | 15 16 17 | 0 0 3558 Proc1 0 18 0 | 19 20 21 | 0 0 3559 0 0 0 | 22 23 0 | 24 0 3560 ------------------------------------- 3561 Proc2 25 26 27 | 0 0 28 | 29 0 3562 30 0 0 | 31 32 33 | 0 34 3563 .ve 3564 3565 This can be represented as a collection of submatrices as: 3566 3567 .vb 3568 A B C 3569 D E F 3570 G H I 3571 .ve 3572 3573 Where the submatrices A,B,C are owned by proc0, D,E,F are 3574 owned by proc1, G,H,I are owned by proc2. 3575 3576 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3577 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3578 The 'M','N' parameters are 8,8, and have the same values on all procs. 3579 3580 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3581 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3582 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3583 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3584 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3585 matrix, ans [DF] as another SeqAIJ matrix. 3586 3587 When d_nz, o_nz parameters are specified, d_nz storage elements are 3588 allocated for every row of the local diagonal submatrix, and o_nz 3589 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3590 One way to choose d_nz and o_nz is to use the max nonzerors per local 3591 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3592 In this case, the values of d_nz,o_nz are: 3593 .vb 3594 proc0 : dnz = 2, o_nz = 2 3595 proc1 : dnz = 3, o_nz = 2 3596 proc2 : dnz = 1, o_nz = 4 3597 .ve 3598 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3599 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3600 for proc3. i.e we are using 12+15+10=37 storage locations to store 3601 34 values. 3602 3603 When d_nnz, o_nnz parameters are specified, the storage is specified 3604 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3605 In the above case the values for d_nnz,o_nnz are: 3606 .vb 3607 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3608 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3609 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3610 .ve 3611 Here the space allocated is sum of all the above values i.e 34, and 3612 hence pre-allocation is perfect. 3613 3614 Level: intermediate 3615 3616 .keywords: matrix, aij, compressed row, sparse, parallel 3617 3618 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3619 MATMPIAIJ, MatCreateMPIAIJWithArrays() 3620 @*/ 3621 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3622 { 3623 PetscErrorCode ierr; 3624 PetscMPIInt size; 3625 3626 PetscFunctionBegin; 3627 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3628 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3629 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3630 if (size > 1) { 3631 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3632 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3633 } else { 3634 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3635 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3636 } 3637 PetscFunctionReturn(0); 3638 } 3639 3640 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3641 { 3642 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3643 PetscBool flg; 3644 PetscErrorCode ierr; 3645 3646 PetscFunctionBegin; 3647 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3648 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 3649 if (Ad) *Ad = a->A; 3650 if (Ao) *Ao = a->B; 3651 if (colmap) *colmap = a->garray; 3652 PetscFunctionReturn(0); 3653 } 3654 3655 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3656 { 3657 PetscErrorCode ierr; 3658 PetscInt m,N,i,rstart,nnz,Ii; 3659 PetscInt *indx; 3660 PetscScalar *values; 3661 3662 PetscFunctionBegin; 3663 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3664 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3665 PetscInt *dnz,*onz,sum,bs,cbs; 3666 3667 if (n == PETSC_DECIDE) { 3668 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3669 } 3670 /* Check sum(n) = N */ 3671 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3672 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3673 3674 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3675 rstart -= m; 3676 3677 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3678 for (i=0; i<m; i++) { 3679 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3680 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3681 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3682 } 3683 3684 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3685 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3686 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3687 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3688 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3689 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3690 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3691 } 3692 3693 /* numeric phase */ 3694 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3695 for (i=0; i<m; i++) { 3696 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3697 Ii = i + rstart; 3698 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3699 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3700 } 3701 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3702 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3703 PetscFunctionReturn(0); 3704 } 3705 3706 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3707 { 3708 PetscErrorCode ierr; 3709 PetscMPIInt rank; 3710 PetscInt m,N,i,rstart,nnz; 3711 size_t len; 3712 const PetscInt *indx; 3713 PetscViewer out; 3714 char *name; 3715 Mat B; 3716 const PetscScalar *values; 3717 3718 PetscFunctionBegin; 3719 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3720 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3721 /* Should this be the type of the diagonal block of A? */ 3722 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3723 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3724 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3725 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3726 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3727 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3728 for (i=0; i<m; i++) { 3729 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3730 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3731 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3732 } 3733 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3734 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3735 3736 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3737 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3738 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3739 sprintf(name,"%s.%d",outfile,rank); 3740 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3741 ierr = PetscFree(name);CHKERRQ(ierr); 3742 ierr = MatView(B,out);CHKERRQ(ierr); 3743 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3744 ierr = MatDestroy(&B);CHKERRQ(ierr); 3745 PetscFunctionReturn(0); 3746 } 3747 3748 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3749 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3750 { 3751 PetscErrorCode ierr; 3752 Mat_Merge_SeqsToMPI *merge; 3753 PetscContainer container; 3754 3755 PetscFunctionBegin; 3756 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3757 if (container) { 3758 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3759 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3760 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3761 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3762 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3763 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3764 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3765 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3766 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3767 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3768 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3769 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3770 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3771 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3772 ierr = PetscFree(merge);CHKERRQ(ierr); 3773 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3774 } 3775 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3776 PetscFunctionReturn(0); 3777 } 3778 3779 #include <../src/mat/utils/freespace.h> 3780 #include <petscbt.h> 3781 3782 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3783 { 3784 PetscErrorCode ierr; 3785 MPI_Comm comm; 3786 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3787 PetscMPIInt size,rank,taga,*len_s; 3788 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3789 PetscInt proc,m; 3790 PetscInt **buf_ri,**buf_rj; 3791 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3792 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3793 MPI_Request *s_waits,*r_waits; 3794 MPI_Status *status; 3795 MatScalar *aa=a->a; 3796 MatScalar **abuf_r,*ba_i; 3797 Mat_Merge_SeqsToMPI *merge; 3798 PetscContainer container; 3799 3800 PetscFunctionBegin; 3801 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3802 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3803 3804 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3805 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3806 3807 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3808 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3809 3810 bi = merge->bi; 3811 bj = merge->bj; 3812 buf_ri = merge->buf_ri; 3813 buf_rj = merge->buf_rj; 3814 3815 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3816 owners = merge->rowmap->range; 3817 len_s = merge->len_s; 3818 3819 /* send and recv matrix values */ 3820 /*-----------------------------*/ 3821 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3822 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3823 3824 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 3825 for (proc=0,k=0; proc<size; proc++) { 3826 if (!len_s[proc]) continue; 3827 i = owners[proc]; 3828 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3829 k++; 3830 } 3831 3832 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3833 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3834 ierr = PetscFree(status);CHKERRQ(ierr); 3835 3836 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3837 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3838 3839 /* insert mat values of mpimat */ 3840 /*----------------------------*/ 3841 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 3842 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 3843 3844 for (k=0; k<merge->nrecv; k++) { 3845 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3846 nrows = *(buf_ri_k[k]); 3847 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3848 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 3849 } 3850 3851 /* set values of ba */ 3852 m = merge->rowmap->n; 3853 for (i=0; i<m; i++) { 3854 arow = owners[rank] + i; 3855 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3856 bnzi = bi[i+1] - bi[i]; 3857 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3858 3859 /* add local non-zero vals of this proc's seqmat into ba */ 3860 anzi = ai[arow+1] - ai[arow]; 3861 aj = a->j + ai[arow]; 3862 aa = a->a + ai[arow]; 3863 nextaj = 0; 3864 for (j=0; nextaj<anzi; j++) { 3865 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 3866 ba_i[j] += aa[nextaj++]; 3867 } 3868 } 3869 3870 /* add received vals into ba */ 3871 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 3872 /* i-th row */ 3873 if (i == *nextrow[k]) { 3874 anzi = *(nextai[k]+1) - *nextai[k]; 3875 aj = buf_rj[k] + *(nextai[k]); 3876 aa = abuf_r[k] + *(nextai[k]); 3877 nextaj = 0; 3878 for (j=0; nextaj<anzi; j++) { 3879 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 3880 ba_i[j] += aa[nextaj++]; 3881 } 3882 } 3883 nextrow[k]++; nextai[k]++; 3884 } 3885 } 3886 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3887 } 3888 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3889 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3890 3891 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 3892 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3893 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3894 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 3895 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3896 PetscFunctionReturn(0); 3897 } 3898 3899 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 3900 3901 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3902 { 3903 PetscErrorCode ierr; 3904 Mat B_mpi; 3905 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3906 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3907 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3908 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 3909 PetscInt len,proc,*dnz,*onz,bs,cbs; 3910 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3911 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3912 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3913 MPI_Status *status; 3914 PetscFreeSpaceList free_space=NULL,current_space=NULL; 3915 PetscBT lnkbt; 3916 Mat_Merge_SeqsToMPI *merge; 3917 PetscContainer container; 3918 3919 PetscFunctionBegin; 3920 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3921 3922 /* make sure it is a PETSc comm */ 3923 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 3924 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3925 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3926 3927 ierr = PetscNew(&merge);CHKERRQ(ierr); 3928 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3929 3930 /* determine row ownership */ 3931 /*---------------------------------------------------------*/ 3932 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3933 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3934 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 3935 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 3936 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 3937 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 3938 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 3939 3940 m = merge->rowmap->n; 3941 owners = merge->rowmap->range; 3942 3943 /* determine the number of messages to send, their lengths */ 3944 /*---------------------------------------------------------*/ 3945 len_s = merge->len_s; 3946 3947 len = 0; /* length of buf_si[] */ 3948 merge->nsend = 0; 3949 for (proc=0; proc<size; proc++) { 3950 len_si[proc] = 0; 3951 if (proc == rank) { 3952 len_s[proc] = 0; 3953 } else { 3954 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3955 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3956 } 3957 if (len_s[proc]) { 3958 merge->nsend++; 3959 nrows = 0; 3960 for (i=owners[proc]; i<owners[proc+1]; i++) { 3961 if (ai[i+1] > ai[i]) nrows++; 3962 } 3963 len_si[proc] = 2*(nrows+1); 3964 len += len_si[proc]; 3965 } 3966 } 3967 3968 /* determine the number and length of messages to receive for ij-structure */ 3969 /*-------------------------------------------------------------------------*/ 3970 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3971 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3972 3973 /* post the Irecv of j-structure */ 3974 /*-------------------------------*/ 3975 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3976 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3977 3978 /* post the Isend of j-structure */ 3979 /*--------------------------------*/ 3980 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 3981 3982 for (proc=0, k=0; proc<size; proc++) { 3983 if (!len_s[proc]) continue; 3984 i = owners[proc]; 3985 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3986 k++; 3987 } 3988 3989 /* receives and sends of j-structure are complete */ 3990 /*------------------------------------------------*/ 3991 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3992 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3993 3994 /* send and recv i-structure */ 3995 /*---------------------------*/ 3996 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3997 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3998 3999 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4000 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4001 for (proc=0,k=0; proc<size; proc++) { 4002 if (!len_s[proc]) continue; 4003 /* form outgoing message for i-structure: 4004 buf_si[0]: nrows to be sent 4005 [1:nrows]: row index (global) 4006 [nrows+1:2*nrows+1]: i-structure index 4007 */ 4008 /*-------------------------------------------*/ 4009 nrows = len_si[proc]/2 - 1; 4010 buf_si_i = buf_si + nrows+1; 4011 buf_si[0] = nrows; 4012 buf_si_i[0] = 0; 4013 nrows = 0; 4014 for (i=owners[proc]; i<owners[proc+1]; i++) { 4015 anzi = ai[i+1] - ai[i]; 4016 if (anzi) { 4017 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4018 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4019 nrows++; 4020 } 4021 } 4022 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4023 k++; 4024 buf_si += len_si[proc]; 4025 } 4026 4027 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4028 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4029 4030 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4031 for (i=0; i<merge->nrecv; i++) { 4032 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4033 } 4034 4035 ierr = PetscFree(len_si);CHKERRQ(ierr); 4036 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4037 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4038 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4039 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4040 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4041 ierr = PetscFree(status);CHKERRQ(ierr); 4042 4043 /* compute a local seq matrix in each processor */ 4044 /*----------------------------------------------*/ 4045 /* allocate bi array and free space for accumulating nonzero column info */ 4046 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4047 bi[0] = 0; 4048 4049 /* create and initialize a linked list */ 4050 nlnk = N+1; 4051 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4052 4053 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4054 len = ai[owners[rank+1]] - ai[owners[rank]]; 4055 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4056 4057 current_space = free_space; 4058 4059 /* determine symbolic info for each local row */ 4060 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4061 4062 for (k=0; k<merge->nrecv; k++) { 4063 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4064 nrows = *buf_ri_k[k]; 4065 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4066 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4067 } 4068 4069 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4070 len = 0; 4071 for (i=0; i<m; i++) { 4072 bnzi = 0; 4073 /* add local non-zero cols of this proc's seqmat into lnk */ 4074 arow = owners[rank] + i; 4075 anzi = ai[arow+1] - ai[arow]; 4076 aj = a->j + ai[arow]; 4077 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4078 bnzi += nlnk; 4079 /* add received col data into lnk */ 4080 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4081 if (i == *nextrow[k]) { /* i-th row */ 4082 anzi = *(nextai[k]+1) - *nextai[k]; 4083 aj = buf_rj[k] + *nextai[k]; 4084 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4085 bnzi += nlnk; 4086 nextrow[k]++; nextai[k]++; 4087 } 4088 } 4089 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4090 4091 /* if free space is not available, make more free space */ 4092 if (current_space->local_remaining<bnzi) { 4093 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4094 nspacedouble++; 4095 } 4096 /* copy data into free space, then initialize lnk */ 4097 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4098 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4099 4100 current_space->array += bnzi; 4101 current_space->local_used += bnzi; 4102 current_space->local_remaining -= bnzi; 4103 4104 bi[i+1] = bi[i] + bnzi; 4105 } 4106 4107 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4108 4109 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4110 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4111 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4112 4113 /* create symbolic parallel matrix B_mpi */ 4114 /*---------------------------------------*/ 4115 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4116 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4117 if (n==PETSC_DECIDE) { 4118 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4119 } else { 4120 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4121 } 4122 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4123 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4124 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4125 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4126 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4127 4128 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4129 B_mpi->assembled = PETSC_FALSE; 4130 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4131 merge->bi = bi; 4132 merge->bj = bj; 4133 merge->buf_ri = buf_ri; 4134 merge->buf_rj = buf_rj; 4135 merge->coi = NULL; 4136 merge->coj = NULL; 4137 merge->owners_co = NULL; 4138 4139 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4140 4141 /* attach the supporting struct to B_mpi for reuse */ 4142 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4143 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4144 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4145 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4146 *mpimat = B_mpi; 4147 4148 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4149 PetscFunctionReturn(0); 4150 } 4151 4152 /*@C 4153 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4154 matrices from each processor 4155 4156 Collective on MPI_Comm 4157 4158 Input Parameters: 4159 + comm - the communicators the parallel matrix will live on 4160 . seqmat - the input sequential matrices 4161 . m - number of local rows (or PETSC_DECIDE) 4162 . n - number of local columns (or PETSC_DECIDE) 4163 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4164 4165 Output Parameter: 4166 . mpimat - the parallel matrix generated 4167 4168 Level: advanced 4169 4170 Notes: 4171 The dimensions of the sequential matrix in each processor MUST be the same. 4172 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4173 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4174 @*/ 4175 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4176 { 4177 PetscErrorCode ierr; 4178 PetscMPIInt size; 4179 4180 PetscFunctionBegin; 4181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4182 if (size == 1) { 4183 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4184 if (scall == MAT_INITIAL_MATRIX) { 4185 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4186 } else { 4187 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4188 } 4189 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4190 PetscFunctionReturn(0); 4191 } 4192 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4193 if (scall == MAT_INITIAL_MATRIX) { 4194 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4195 } 4196 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4197 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4198 PetscFunctionReturn(0); 4199 } 4200 4201 /*@ 4202 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4203 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4204 with MatGetSize() 4205 4206 Not Collective 4207 4208 Input Parameters: 4209 + A - the matrix 4210 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4211 4212 Output Parameter: 4213 . A_loc - the local sequential matrix generated 4214 4215 Level: developer 4216 4217 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4218 4219 @*/ 4220 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4221 { 4222 PetscErrorCode ierr; 4223 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4224 Mat_SeqAIJ *mat,*a,*b; 4225 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4226 MatScalar *aa,*ba,*cam; 4227 PetscScalar *ca; 4228 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4229 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4230 PetscBool match; 4231 MPI_Comm comm; 4232 PetscMPIInt size; 4233 4234 PetscFunctionBegin; 4235 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4236 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4237 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4238 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4239 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4240 4241 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4242 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4243 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4244 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4245 aa = a->a; ba = b->a; 4246 if (scall == MAT_INITIAL_MATRIX) { 4247 if (size == 1) { 4248 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4249 PetscFunctionReturn(0); 4250 } 4251 4252 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4253 ci[0] = 0; 4254 for (i=0; i<am; i++) { 4255 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4256 } 4257 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4258 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4259 k = 0; 4260 for (i=0; i<am; i++) { 4261 ncols_o = bi[i+1] - bi[i]; 4262 ncols_d = ai[i+1] - ai[i]; 4263 /* off-diagonal portion of A */ 4264 for (jo=0; jo<ncols_o; jo++) { 4265 col = cmap[*bj]; 4266 if (col >= cstart) break; 4267 cj[k] = col; bj++; 4268 ca[k++] = *ba++; 4269 } 4270 /* diagonal portion of A */ 4271 for (j=0; j<ncols_d; j++) { 4272 cj[k] = cstart + *aj++; 4273 ca[k++] = *aa++; 4274 } 4275 /* off-diagonal portion of A */ 4276 for (j=jo; j<ncols_o; j++) { 4277 cj[k] = cmap[*bj++]; 4278 ca[k++] = *ba++; 4279 } 4280 } 4281 /* put together the new matrix */ 4282 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4283 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4284 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4285 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4286 mat->free_a = PETSC_TRUE; 4287 mat->free_ij = PETSC_TRUE; 4288 mat->nonew = 0; 4289 } else if (scall == MAT_REUSE_MATRIX) { 4290 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4291 ci = mat->i; cj = mat->j; cam = mat->a; 4292 for (i=0; i<am; i++) { 4293 /* off-diagonal portion of A */ 4294 ncols_o = bi[i+1] - bi[i]; 4295 for (jo=0; jo<ncols_o; jo++) { 4296 col = cmap[*bj]; 4297 if (col >= cstart) break; 4298 *cam++ = *ba++; bj++; 4299 } 4300 /* diagonal portion of A */ 4301 ncols_d = ai[i+1] - ai[i]; 4302 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4303 /* off-diagonal portion of A */ 4304 for (j=jo; j<ncols_o; j++) { 4305 *cam++ = *ba++; bj++; 4306 } 4307 } 4308 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4309 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4310 PetscFunctionReturn(0); 4311 } 4312 4313 /*@C 4314 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4315 4316 Not Collective 4317 4318 Input Parameters: 4319 + A - the matrix 4320 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4321 - row, col - index sets of rows and columns to extract (or NULL) 4322 4323 Output Parameter: 4324 . A_loc - the local sequential matrix generated 4325 4326 Level: developer 4327 4328 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4329 4330 @*/ 4331 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4332 { 4333 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4334 PetscErrorCode ierr; 4335 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4336 IS isrowa,iscola; 4337 Mat *aloc; 4338 PetscBool match; 4339 4340 PetscFunctionBegin; 4341 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4342 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4343 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4344 if (!row) { 4345 start = A->rmap->rstart; end = A->rmap->rend; 4346 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4347 } else { 4348 isrowa = *row; 4349 } 4350 if (!col) { 4351 start = A->cmap->rstart; 4352 cmap = a->garray; 4353 nzA = a->A->cmap->n; 4354 nzB = a->B->cmap->n; 4355 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4356 ncols = 0; 4357 for (i=0; i<nzB; i++) { 4358 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4359 else break; 4360 } 4361 imark = i; 4362 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4363 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4364 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4365 } else { 4366 iscola = *col; 4367 } 4368 if (scall != MAT_INITIAL_MATRIX) { 4369 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4370 aloc[0] = *A_loc; 4371 } 4372 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4373 *A_loc = aloc[0]; 4374 ierr = PetscFree(aloc);CHKERRQ(ierr); 4375 if (!row) { 4376 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4377 } 4378 if (!col) { 4379 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4380 } 4381 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4382 PetscFunctionReturn(0); 4383 } 4384 4385 /*@C 4386 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4387 4388 Collective on Mat 4389 4390 Input Parameters: 4391 + A,B - the matrices in mpiaij format 4392 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4393 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4394 4395 Output Parameter: 4396 + rowb, colb - index sets of rows and columns of B to extract 4397 - B_seq - the sequential matrix generated 4398 4399 Level: developer 4400 4401 @*/ 4402 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4403 { 4404 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4405 PetscErrorCode ierr; 4406 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4407 IS isrowb,iscolb; 4408 Mat *bseq=NULL; 4409 4410 PetscFunctionBegin; 4411 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4412 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4413 } 4414 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4415 4416 if (scall == MAT_INITIAL_MATRIX) { 4417 start = A->cmap->rstart; 4418 cmap = a->garray; 4419 nzA = a->A->cmap->n; 4420 nzB = a->B->cmap->n; 4421 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4422 ncols = 0; 4423 for (i=0; i<nzB; i++) { /* row < local row index */ 4424 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4425 else break; 4426 } 4427 imark = i; 4428 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4429 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4430 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4431 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4432 } else { 4433 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4434 isrowb = *rowb; iscolb = *colb; 4435 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4436 bseq[0] = *B_seq; 4437 } 4438 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4439 *B_seq = bseq[0]; 4440 ierr = PetscFree(bseq);CHKERRQ(ierr); 4441 if (!rowb) { 4442 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4443 } else { 4444 *rowb = isrowb; 4445 } 4446 if (!colb) { 4447 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4448 } else { 4449 *colb = iscolb; 4450 } 4451 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4452 PetscFunctionReturn(0); 4453 } 4454 4455 /* 4456 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4457 of the OFF-DIAGONAL portion of local A 4458 4459 Collective on Mat 4460 4461 Input Parameters: 4462 + A,B - the matrices in mpiaij format 4463 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4464 4465 Output Parameter: 4466 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4467 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4468 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4469 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4470 4471 Level: developer 4472 4473 */ 4474 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4475 { 4476 VecScatter_MPI_General *gen_to,*gen_from; 4477 PetscErrorCode ierr; 4478 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4479 Mat_SeqAIJ *b_oth; 4480 VecScatter ctx =a->Mvctx; 4481 MPI_Comm comm; 4482 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4483 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4484 PetscScalar *rvalues,*svalues; 4485 MatScalar *b_otha,*bufa,*bufA; 4486 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4487 MPI_Request *rwaits = NULL,*swaits = NULL; 4488 MPI_Status *sstatus,rstatus; 4489 PetscMPIInt jj,size; 4490 PetscInt *cols,sbs,rbs; 4491 PetscScalar *vals; 4492 4493 PetscFunctionBegin; 4494 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4495 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4496 4497 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4498 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4499 } 4500 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4501 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4502 4503 gen_to = (VecScatter_MPI_General*)ctx->todata; 4504 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4505 rvalues = gen_from->values; /* holds the length of receiving row */ 4506 svalues = gen_to->values; /* holds the length of sending row */ 4507 nrecvs = gen_from->n; 4508 nsends = gen_to->n; 4509 4510 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4511 srow = gen_to->indices; /* local row index to be sent */ 4512 sstarts = gen_to->starts; 4513 sprocs = gen_to->procs; 4514 sstatus = gen_to->sstatus; 4515 sbs = gen_to->bs; 4516 rstarts = gen_from->starts; 4517 rprocs = gen_from->procs; 4518 rbs = gen_from->bs; 4519 4520 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4521 if (scall == MAT_INITIAL_MATRIX) { 4522 /* i-array */ 4523 /*---------*/ 4524 /* post receives */ 4525 for (i=0; i<nrecvs; i++) { 4526 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4527 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4528 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4529 } 4530 4531 /* pack the outgoing message */ 4532 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4533 4534 sstartsj[0] = 0; 4535 rstartsj[0] = 0; 4536 len = 0; /* total length of j or a array to be sent */ 4537 k = 0; 4538 for (i=0; i<nsends; i++) { 4539 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4540 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4541 for (j=0; j<nrows; j++) { 4542 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4543 for (l=0; l<sbs; l++) { 4544 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4545 4546 rowlen[j*sbs+l] = ncols; 4547 4548 len += ncols; 4549 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4550 } 4551 k++; 4552 } 4553 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4554 4555 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4556 } 4557 /* recvs and sends of i-array are completed */ 4558 i = nrecvs; 4559 while (i--) { 4560 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4561 } 4562 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4563 4564 /* allocate buffers for sending j and a arrays */ 4565 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4566 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4567 4568 /* create i-array of B_oth */ 4569 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4570 4571 b_othi[0] = 0; 4572 len = 0; /* total length of j or a array to be received */ 4573 k = 0; 4574 for (i=0; i<nrecvs; i++) { 4575 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4576 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4577 for (j=0; j<nrows; j++) { 4578 b_othi[k+1] = b_othi[k] + rowlen[j]; 4579 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4580 k++; 4581 } 4582 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4583 } 4584 4585 /* allocate space for j and a arrrays of B_oth */ 4586 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4587 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4588 4589 /* j-array */ 4590 /*---------*/ 4591 /* post receives of j-array */ 4592 for (i=0; i<nrecvs; i++) { 4593 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4594 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4595 } 4596 4597 /* pack the outgoing message j-array */ 4598 k = 0; 4599 for (i=0; i<nsends; i++) { 4600 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4601 bufJ = bufj+sstartsj[i]; 4602 for (j=0; j<nrows; j++) { 4603 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4604 for (ll=0; ll<sbs; ll++) { 4605 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4606 for (l=0; l<ncols; l++) { 4607 *bufJ++ = cols[l]; 4608 } 4609 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4610 } 4611 } 4612 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4613 } 4614 4615 /* recvs and sends of j-array are completed */ 4616 i = nrecvs; 4617 while (i--) { 4618 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4619 } 4620 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4621 } else if (scall == MAT_REUSE_MATRIX) { 4622 sstartsj = *startsj_s; 4623 rstartsj = *startsj_r; 4624 bufa = *bufa_ptr; 4625 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4626 b_otha = b_oth->a; 4627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4628 4629 /* a-array */ 4630 /*---------*/ 4631 /* post receives of a-array */ 4632 for (i=0; i<nrecvs; i++) { 4633 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4634 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4635 } 4636 4637 /* pack the outgoing message a-array */ 4638 k = 0; 4639 for (i=0; i<nsends; i++) { 4640 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4641 bufA = bufa+sstartsj[i]; 4642 for (j=0; j<nrows; j++) { 4643 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4644 for (ll=0; ll<sbs; ll++) { 4645 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4646 for (l=0; l<ncols; l++) { 4647 *bufA++ = vals[l]; 4648 } 4649 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4650 } 4651 } 4652 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4653 } 4654 /* recvs and sends of a-array are completed */ 4655 i = nrecvs; 4656 while (i--) { 4657 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4658 } 4659 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4660 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4661 4662 if (scall == MAT_INITIAL_MATRIX) { 4663 /* put together the new matrix */ 4664 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4665 4666 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4667 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4668 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4669 b_oth->free_a = PETSC_TRUE; 4670 b_oth->free_ij = PETSC_TRUE; 4671 b_oth->nonew = 0; 4672 4673 ierr = PetscFree(bufj);CHKERRQ(ierr); 4674 if (!startsj_s || !bufa_ptr) { 4675 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4676 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4677 } else { 4678 *startsj_s = sstartsj; 4679 *startsj_r = rstartsj; 4680 *bufa_ptr = bufa; 4681 } 4682 } 4683 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4684 PetscFunctionReturn(0); 4685 } 4686 4687 /*@C 4688 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4689 4690 Not Collective 4691 4692 Input Parameters: 4693 . A - The matrix in mpiaij format 4694 4695 Output Parameter: 4696 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4697 . colmap - A map from global column index to local index into lvec 4698 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4699 4700 Level: developer 4701 4702 @*/ 4703 #if defined(PETSC_USE_CTABLE) 4704 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4705 #else 4706 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4707 #endif 4708 { 4709 Mat_MPIAIJ *a; 4710 4711 PetscFunctionBegin; 4712 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4713 PetscValidPointer(lvec, 2); 4714 PetscValidPointer(colmap, 3); 4715 PetscValidPointer(multScatter, 4); 4716 a = (Mat_MPIAIJ*) A->data; 4717 if (lvec) *lvec = a->lvec; 4718 if (colmap) *colmap = a->colmap; 4719 if (multScatter) *multScatter = a->Mvctx; 4720 PetscFunctionReturn(0); 4721 } 4722 4723 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4724 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4725 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4726 #if defined(PETSC_HAVE_ELEMENTAL) 4727 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4728 #endif 4729 #if defined(PETSC_HAVE_HYPRE) 4730 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 4731 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 4732 #endif 4733 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 4734 4735 /* 4736 Computes (B'*A')' since computing B*A directly is untenable 4737 4738 n p p 4739 ( ) ( ) ( ) 4740 m ( A ) * n ( B ) = m ( C ) 4741 ( ) ( ) ( ) 4742 4743 */ 4744 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4745 { 4746 PetscErrorCode ierr; 4747 Mat At,Bt,Ct; 4748 4749 PetscFunctionBegin; 4750 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4751 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4752 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4753 ierr = MatDestroy(&At);CHKERRQ(ierr); 4754 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4755 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4756 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4757 PetscFunctionReturn(0); 4758 } 4759 4760 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4761 { 4762 PetscErrorCode ierr; 4763 PetscInt m=A->rmap->n,n=B->cmap->n; 4764 Mat Cmat; 4765 4766 PetscFunctionBegin; 4767 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4768 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4769 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4770 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4771 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4772 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4773 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4774 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4775 4776 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4777 4778 *C = Cmat; 4779 PetscFunctionReturn(0); 4780 } 4781 4782 /* ----------------------------------------------------------------*/ 4783 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4784 { 4785 PetscErrorCode ierr; 4786 4787 PetscFunctionBegin; 4788 if (scall == MAT_INITIAL_MATRIX) { 4789 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4790 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4791 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4792 } 4793 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4794 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4795 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4796 PetscFunctionReturn(0); 4797 } 4798 4799 /*MC 4800 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4801 4802 Options Database Keys: 4803 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4804 4805 Level: beginner 4806 4807 .seealso: MatCreateAIJ() 4808 M*/ 4809 4810 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 4811 { 4812 Mat_MPIAIJ *b; 4813 PetscErrorCode ierr; 4814 PetscMPIInt size; 4815 4816 PetscFunctionBegin; 4817 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4818 4819 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4820 B->data = (void*)b; 4821 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4822 B->assembled = PETSC_FALSE; 4823 B->insertmode = NOT_SET_VALUES; 4824 b->size = size; 4825 4826 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 4827 4828 /* build cache for off array entries formed */ 4829 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 4830 4831 b->donotstash = PETSC_FALSE; 4832 b->colmap = 0; 4833 b->garray = 0; 4834 b->roworiented = PETSC_TRUE; 4835 4836 /* stuff used for matrix vector multiply */ 4837 b->lvec = NULL; 4838 b->Mvctx = NULL; 4839 4840 /* stuff for MatGetRow() */ 4841 b->rowindices = 0; 4842 b->rowvalues = 0; 4843 b->getrowactive = PETSC_FALSE; 4844 4845 /* flexible pointer used in CUSP/CUSPARSE classes */ 4846 b->spptr = NULL; 4847 4848 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 4849 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4850 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4851 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4852 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4853 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4854 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4855 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 4856 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 4857 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 4858 #if defined(PETSC_HAVE_ELEMENTAL) 4859 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 4860 #endif 4861 #if defined(PETSC_HAVE_HYPRE) 4862 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 4863 #endif 4864 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 4865 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 4866 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 4867 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 4868 #if defined(PETSC_HAVE_HYPRE) 4869 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 4870 #endif 4871 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4872 PetscFunctionReturn(0); 4873 } 4874 4875 /*@C 4876 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4877 and "off-diagonal" part of the matrix in CSR format. 4878 4879 Collective on MPI_Comm 4880 4881 Input Parameters: 4882 + comm - MPI communicator 4883 . m - number of local rows (Cannot be PETSC_DECIDE) 4884 . n - This value should be the same as the local size used in creating the 4885 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4886 calculated if N is given) For square matrices n is almost always m. 4887 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4888 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4889 . i - row indices for "diagonal" portion of matrix 4890 . j - column indices 4891 . a - matrix values 4892 . oi - row indices for "off-diagonal" portion of matrix 4893 . oj - column indices 4894 - oa - matrix values 4895 4896 Output Parameter: 4897 . mat - the matrix 4898 4899 Level: advanced 4900 4901 Notes: 4902 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 4903 must free the arrays once the matrix has been destroyed and not before. 4904 4905 The i and j indices are 0 based 4906 4907 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4908 4909 This sets local rows and cannot be used to set off-processor values. 4910 4911 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 4912 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 4913 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 4914 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 4915 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 4916 communication if it is known that only local entries will be set. 4917 4918 .keywords: matrix, aij, compressed row, sparse, parallel 4919 4920 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4921 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 4922 @*/ 4923 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4924 { 4925 PetscErrorCode ierr; 4926 Mat_MPIAIJ *maij; 4927 4928 PetscFunctionBegin; 4929 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4930 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4931 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4932 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4933 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4934 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4935 maij = (Mat_MPIAIJ*) (*mat)->data; 4936 4937 (*mat)->preallocated = PETSC_TRUE; 4938 4939 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 4940 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 4941 4942 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4943 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4944 4945 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4946 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4947 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4948 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4949 4950 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4951 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4952 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4953 PetscFunctionReturn(0); 4954 } 4955 4956 /* 4957 Special version for direct calls from Fortran 4958 */ 4959 #include <petsc/private/fortranimpl.h> 4960 4961 /* Change these macros so can be used in void function */ 4962 #undef CHKERRQ 4963 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 4964 #undef SETERRQ2 4965 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 4966 #undef SETERRQ3 4967 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 4968 #undef SETERRQ 4969 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 4970 4971 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4972 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4974 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4975 #else 4976 #endif 4977 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4978 { 4979 Mat mat = *mmat; 4980 PetscInt m = *mm, n = *mn; 4981 InsertMode addv = *maddv; 4982 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4983 PetscScalar value; 4984 PetscErrorCode ierr; 4985 4986 MatCheckPreallocated(mat,1); 4987 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 4988 4989 #if defined(PETSC_USE_DEBUG) 4990 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4991 #endif 4992 { 4993 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 4994 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 4995 PetscBool roworiented = aij->roworiented; 4996 4997 /* Some Variables required in the macro */ 4998 Mat A = aij->A; 4999 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5000 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5001 MatScalar *aa = a->a; 5002 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5003 Mat B = aij->B; 5004 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5005 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5006 MatScalar *ba = b->a; 5007 5008 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5009 PetscInt nonew = a->nonew; 5010 MatScalar *ap1,*ap2; 5011 5012 PetscFunctionBegin; 5013 for (i=0; i<m; i++) { 5014 if (im[i] < 0) continue; 5015 #if defined(PETSC_USE_DEBUG) 5016 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5017 #endif 5018 if (im[i] >= rstart && im[i] < rend) { 5019 row = im[i] - rstart; 5020 lastcol1 = -1; 5021 rp1 = aj + ai[row]; 5022 ap1 = aa + ai[row]; 5023 rmax1 = aimax[row]; 5024 nrow1 = ailen[row]; 5025 low1 = 0; 5026 high1 = nrow1; 5027 lastcol2 = -1; 5028 rp2 = bj + bi[row]; 5029 ap2 = ba + bi[row]; 5030 rmax2 = bimax[row]; 5031 nrow2 = bilen[row]; 5032 low2 = 0; 5033 high2 = nrow2; 5034 5035 for (j=0; j<n; j++) { 5036 if (roworiented) value = v[i*n+j]; 5037 else value = v[i+j*m]; 5038 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5039 if (in[j] >= cstart && in[j] < cend) { 5040 col = in[j] - cstart; 5041 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5042 } else if (in[j] < 0) continue; 5043 #if defined(PETSC_USE_DEBUG) 5044 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5045 #endif 5046 else { 5047 if (mat->was_assembled) { 5048 if (!aij->colmap) { 5049 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5050 } 5051 #if defined(PETSC_USE_CTABLE) 5052 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5053 col--; 5054 #else 5055 col = aij->colmap[in[j]] - 1; 5056 #endif 5057 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5058 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5059 col = in[j]; 5060 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5061 B = aij->B; 5062 b = (Mat_SeqAIJ*)B->data; 5063 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5064 rp2 = bj + bi[row]; 5065 ap2 = ba + bi[row]; 5066 rmax2 = bimax[row]; 5067 nrow2 = bilen[row]; 5068 low2 = 0; 5069 high2 = nrow2; 5070 bm = aij->B->rmap->n; 5071 ba = b->a; 5072 } 5073 } else col = in[j]; 5074 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5075 } 5076 } 5077 } else if (!aij->donotstash) { 5078 if (roworiented) { 5079 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5080 } else { 5081 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5082 } 5083 } 5084 } 5085 } 5086 PetscFunctionReturnVoid(); 5087 } 5088 5089