1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 109 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 110 { 111 PetscErrorCode ierr; 112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 113 114 PetscFunctionBegin; 115 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 116 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 117 } else { 118 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 119 } 120 PetscFunctionReturn(0); 121 } 122 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 127 { 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 129 PetscErrorCode ierr; 130 PetscInt i,rstart,nrows,*rows; 131 132 PetscFunctionBegin; 133 *zrows = NULL; 134 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 135 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 136 for (i=0; i<nrows; i++) rows[i] += rstart; 137 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 144 { 145 PetscErrorCode ierr; 146 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 147 PetscInt i,n,*garray = aij->garray; 148 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 149 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 150 PetscReal *work; 151 152 PetscFunctionBegin; 153 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 154 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 155 if (type == NORM_2) { 156 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 157 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 158 } 159 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 160 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 161 } 162 } else if (type == NORM_1) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 168 } 169 } else if (type == NORM_INFINITY) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 175 } 176 177 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 178 if (type == NORM_INFINITY) { 179 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 180 } else { 181 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 182 } 183 ierr = PetscFree(work);CHKERRQ(ierr); 184 if (type == NORM_2) { 185 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatDistribute_MPIAIJ" 192 /* 193 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 194 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 195 196 Only for square matrices 197 198 Used by a preconditioner, hence PETSC_EXTERN 199 */ 200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 201 { 202 PetscMPIInt rank,size; 203 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 204 PetscErrorCode ierr; 205 Mat mat; 206 Mat_SeqAIJ *gmata; 207 PetscMPIInt tag; 208 MPI_Status status; 209 PetscBool aij; 210 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 211 212 PetscFunctionBegin; 213 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 215 if (!rank) { 216 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 217 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 218 } 219 if (reuse == MAT_INITIAL_MATRIX) { 220 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 221 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 222 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 223 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 224 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 225 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 226 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 227 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 228 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 229 230 rowners[0] = 0; 231 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 232 rstart = rowners[rank]; 233 rend = rowners[rank+1]; 234 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 235 if (!rank) { 236 gmata = (Mat_SeqAIJ*) gmat->data; 237 /* send row lengths to all processors */ 238 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 239 for (i=1; i<size; i++) { 240 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 /* determine number diagonal and off-diagonal counts */ 243 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 244 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 245 jj = 0; 246 for (i=0; i<m; i++) { 247 for (j=0; j<dlens[i]; j++) { 248 if (gmata->j[jj] < rstart) ld[i]++; 249 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 250 jj++; 251 } 252 } 253 /* send column indices to other processes */ 254 for (i=1; i<size; i++) { 255 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 256 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 257 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 } 259 260 /* send numerical values to other processes */ 261 for (i=1; i<size; i++) { 262 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 263 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 264 } 265 gmataa = gmata->a; 266 gmataj = gmata->j; 267 268 } else { 269 /* receive row lengths */ 270 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 271 /* receive column indices */ 272 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 273 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 275 /* determine number diagonal and off-diagonal counts */ 276 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 277 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 278 jj = 0; 279 for (i=0; i<m; i++) { 280 for (j=0; j<dlens[i]; j++) { 281 if (gmataj[jj] < rstart) ld[i]++; 282 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 283 jj++; 284 } 285 } 286 /* receive numerical values */ 287 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 288 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 289 } 290 /* set preallocation */ 291 for (i=0; i<m; i++) { 292 dlens[i] -= olens[i]; 293 } 294 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 295 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 296 297 for (i=0; i<m; i++) { 298 dlens[i] += olens[i]; 299 } 300 cnt = 0; 301 for (i=0; i<m; i++) { 302 row = rstart + i; 303 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 304 cnt += dlens[i]; 305 } 306 if (rank) { 307 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 308 } 309 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 310 ierr = PetscFree(rowners);CHKERRQ(ierr); 311 312 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 313 314 *inmat = mat; 315 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 316 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 317 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 318 mat = *inmat; 319 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 320 if (!rank) { 321 /* send numerical values to other processes */ 322 gmata = (Mat_SeqAIJ*) gmat->data; 323 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 324 gmataa = gmata->a; 325 for (i=1; i<size; i++) { 326 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 327 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 328 } 329 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 330 } else { 331 /* receive numerical values from process 0*/ 332 nz = Ad->nz + Ao->nz; 333 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 334 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 335 } 336 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 337 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 338 ad = Ad->a; 339 ao = Ao->a; 340 if (mat->rmap->n) { 341 i = 0; 342 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 343 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 344 } 345 for (i=1; i<mat->rmap->n; i++) { 346 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 347 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 348 } 349 i--; 350 if (mat->rmap->n) { 351 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 352 } 353 if (rank) { 354 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 355 } 356 } 357 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to acess. 368 */ 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 372 { 373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 374 PetscErrorCode ierr; 375 PetscInt n = aij->B->cmap->n,i; 376 377 PetscFunctionBegin; 378 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 379 #if defined(PETSC_USE_CTABLE) 380 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 381 for (i=0; i<n; i++) { 382 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 383 } 384 #else 385 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 386 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 387 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 388 #endif 389 PetscFunctionReturn(0); 390 } 391 392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 393 { \ 394 if (col <= lastcol1) low1 = 0; \ 395 else high1 = nrow1; \ 396 lastcol1 = col;\ 397 while (high1-low1 > 5) { \ 398 t = (low1+high1)/2; \ 399 if (rp1[t] > col) high1 = t; \ 400 else low1 = t; \ 401 } \ 402 for (_i=low1; _i<high1; _i++) { \ 403 if (rp1[_i] > col) break; \ 404 if (rp1[_i] == col) { \ 405 if (addv == ADD_VALUES) ap1[_i] += value; \ 406 else ap1[_i] = value; \ 407 goto a_noinsert; \ 408 } \ 409 } \ 410 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 411 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 412 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 413 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 414 N = nrow1++ - 1; a->nz++; high1++; \ 415 /* shift up all the later entries in this row */ \ 416 for (ii=N; ii>=_i; ii--) { \ 417 rp1[ii+1] = rp1[ii]; \ 418 ap1[ii+1] = ap1[ii]; \ 419 } \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++;\ 423 a_noinsert: ; \ 424 ailen[row] = nrow1; \ 425 } 426 427 428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 429 { \ 430 if (col <= lastcol2) low2 = 0; \ 431 else high2 = nrow2; \ 432 lastcol2 = col; \ 433 while (high2-low2 > 5) { \ 434 t = (low2+high2)/2; \ 435 if (rp2[t] > col) high2 = t; \ 436 else low2 = t; \ 437 } \ 438 for (_i=low2; _i<high2; _i++) { \ 439 if (rp2[_i] > col) break; \ 440 if (rp2[_i] == col) { \ 441 if (addv == ADD_VALUES) ap2[_i] += value; \ 442 else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 447 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 448 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 449 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 450 N = nrow2++ - 1; b->nz++; high2++; \ 451 /* shift up all the later entries in this row */ \ 452 for (ii=N; ii>=_i; ii--) { \ 453 rp2[ii+1] = rp2[ii]; \ 454 ap2[ii+1] = ap2[ii]; \ 455 } \ 456 rp2[_i] = col; \ 457 ap2[_i] = value; \ 458 B->nonzerostate++; \ 459 b_noinsert: ; \ 460 bilen[row] = nrow2; \ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 466 { 467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 469 PetscErrorCode ierr; 470 PetscInt l,*garray = mat->garray,diag; 471 472 PetscFunctionBegin; 473 /* code only works for square matrices A */ 474 475 /* find size of row to the left of the diagonal part */ 476 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 477 row = row - diag; 478 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 479 if (garray[b->j[b->i[row]+l]] > diag) break; 480 } 481 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 482 483 /* diagonal part */ 484 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 485 486 /* right of diagonal part */ 487 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatSetValues_MPIAIJ" 493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 494 { 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 496 PetscScalar value; 497 PetscErrorCode ierr; 498 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 499 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 500 PetscBool roworiented = aij->roworiented; 501 502 /* Some Variables required in the macro */ 503 Mat A = aij->A; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 505 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 506 MatScalar *aa = a->a; 507 PetscBool ignorezeroentries = a->ignorezeroentries; 508 Mat B = aij->B; 509 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 510 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 511 MatScalar *ba = b->a; 512 513 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 514 PetscInt nonew; 515 MatScalar *ap1,*ap2; 516 517 PetscFunctionBegin; 518 for (i=0; i<m; i++) { 519 if (im[i] < 0) continue; 520 #if defined(PETSC_USE_DEBUG) 521 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 522 #endif 523 if (im[i] >= rstart && im[i] < rend) { 524 row = im[i] - rstart; 525 lastcol1 = -1; 526 rp1 = aj + ai[row]; 527 ap1 = aa + ai[row]; 528 rmax1 = aimax[row]; 529 nrow1 = ailen[row]; 530 low1 = 0; 531 high1 = nrow1; 532 lastcol2 = -1; 533 rp2 = bj + bi[row]; 534 ap2 = ba + bi[row]; 535 rmax2 = bimax[row]; 536 nrow2 = bilen[row]; 537 low2 = 0; 538 high2 = nrow2; 539 540 for (j=0; j<n; j++) { 541 if (roworiented) value = v[i*n+j]; 542 else value = v[i+j*m]; 543 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 544 if (in[j] >= cstart && in[j] < cend) { 545 col = in[j] - cstart; 546 nonew = a->nonew; 547 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 548 } else if (in[j] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 551 #endif 552 else { 553 if (mat->was_assembled) { 554 if (!aij->colmap) { 555 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 559 col--; 560 #else 561 col = aij->colmap[in[j]] - 1; 562 #endif 563 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 564 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 565 col = in[j]; 566 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 567 B = aij->B; 568 b = (Mat_SeqAIJ*)B->data; 569 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 570 rp2 = bj + bi[row]; 571 ap2 = ba + bi[row]; 572 rmax2 = bimax[row]; 573 nrow2 = bilen[row]; 574 low2 = 0; 575 high2 = nrow2; 576 bm = aij->B->rmap->n; 577 ba = b->a; 578 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 579 } else col = in[j]; 580 nonew = b->nonew; 581 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 582 } 583 } 584 } else { 585 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 586 if (!aij->donotstash) { 587 mat->assembled = PETSC_FALSE; 588 if (roworiented) { 589 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 590 } else { 591 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 592 } 593 } 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatGetValues_MPIAIJ" 601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 602 { 603 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 604 PetscErrorCode ierr; 605 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 606 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 607 608 PetscFunctionBegin; 609 for (i=0; i<m; i++) { 610 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 611 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 612 if (idxm[i] >= rstart && idxm[i] < rend) { 613 row = idxm[i] - rstart; 614 for (j=0; j<n; j++) { 615 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 616 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 617 if (idxn[j] >= cstart && idxn[j] < cend) { 618 col = idxn[j] - cstart; 619 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } else { 621 if (!aij->colmap) { 622 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 623 } 624 #if defined(PETSC_USE_CTABLE) 625 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 626 col--; 627 #else 628 col = aij->colmap[idxn[j]] - 1; 629 #endif 630 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 631 else { 632 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } 634 } 635 } 636 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 637 } 638 PetscFunctionReturn(0); 639 } 640 641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 648 PetscErrorCode ierr; 649 PetscInt nstash,reallocs; 650 InsertMode addv; 651 652 PetscFunctionBegin; 653 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 654 655 /* make sure all processors are either in INSERTMODE or ADDMODE */ 656 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 657 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 658 mat->insertmode = addv; /* in case this processor had no cache */ 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 672 PetscErrorCode ierr; 673 PetscMPIInt n; 674 PetscInt i,j,rstart,ncols,flg; 675 PetscInt *row,*col; 676 PetscBool other_disassembled; 677 PetscScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 681 682 PetscFunctionBegin; 683 if (!aij->donotstash && !mat->nooffprocentries) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 if (!flg) break; 687 688 for (i=0; i<n; ) { 689 /* Now identify the consecutive vals belonging to the same row */ 690 for (j=i,rstart=row[j]; j<n; j++) { 691 if (row[j] != rstart) break; 692 } 693 if (j < n) ncols = j-i; 694 else ncols = n-i; 695 /* Now assemble all these values with a single function call */ 696 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 697 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 } 703 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 705 706 /* determine if any processor has disassembled, if so we must 707 also disassemble ourselfs, in order that we may reassemble. */ 708 /* 709 if nonzero structure of submatrix B cannot change then we know that 710 no processor disassembled thus we can skip this stuff 711 */ 712 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 713 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 714 if (mat->was_assembled && !other_disassembled) { 715 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 716 } 717 } 718 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 719 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 720 } 721 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 722 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 724 725 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 726 727 aij->rowvalues = 0; 728 729 /* used by MatAXPY() */ 730 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 731 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 732 733 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 734 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 735 736 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 737 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 738 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 739 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 747 { 748 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 753 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatZeroRows_MPIAIJ" 759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 760 { 761 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 762 PetscInt *owners = A->rmap->range; 763 PetscInt n = A->rmap->n; 764 PetscSF sf; 765 PetscInt *lrows; 766 PetscSFNode *rrows; 767 PetscInt r, p = 0, len = 0; 768 PetscErrorCode ierr; 769 770 PetscFunctionBegin; 771 /* Create SF where leaves are input rows and roots are owned rows */ 772 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 773 for (r = 0; r < n; ++r) lrows[r] = -1; 774 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 775 for (r = 0; r < N; ++r) { 776 const PetscInt idx = rows[r]; 777 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 778 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 779 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 780 } 781 if (A->nooffproczerorows) { 782 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 783 lrows[len++] = idx - owners[p]; 784 } else { 785 rrows[r].rank = p; 786 rrows[r].index = rows[r] - owners[p]; 787 } 788 } 789 if (!A->nooffproczerorows) { 790 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 791 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 792 /* Collect flags for rows to be zeroed */ 793 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 794 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 795 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 796 /* Compress and put in row numbers */ 797 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 798 } 799 /* fix right hand side if needed */ 800 if (x && b) { 801 const PetscScalar *xx; 802 PetscScalar *bb; 803 804 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 805 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 806 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 807 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 808 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 809 } 810 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 811 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 812 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 813 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 814 } else if (diag != 0.0) { 815 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 828 /* only change matrix nonzero state if pattern was allowed to be changed */ 829 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 830 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 831 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,p = 0,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x) { 888 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 891 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 892 } 893 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 894 /* remove zeroed rows of off diagonal matrix */ 895 ii = aij->i; 896 for (i=0; i<len; i++) { 897 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 898 } 899 /* loop over all elements of off process part of matrix zeroing removed columns*/ 900 if (aij->compressedrow.use) { 901 m = aij->compressedrow.nrows; 902 ii = aij->compressedrow.i; 903 ridx = aij->compressedrow.rindex; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 909 for (j=0; j<n; j++) { 910 if (PetscAbsScalar(mask[*aj])) { 911 if (b) bb[*ridx] -= *aa*xx[*aj]; 912 *aa = 0.0; 913 } 914 aa++; 915 aj++; 916 } 917 ridx++; 918 } 919 } else { /* do not use compressed row format */ 920 m = l->B->rmap->n; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[i] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 } 934 } 935 if (x) { 936 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 937 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 938 } 939 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 940 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMult_MPIAIJ" 953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 954 { 955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt nt; 958 959 PetscFunctionBegin; 960 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 961 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 962 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultAdd_MPIAIJ" 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 PetscBool merged; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1006 /* do nondiagonal part */ 1007 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1008 if (!merged) { 1009 /* send it on its way */ 1010 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* receive remote parts: note this assumes the values are not actually */ 1014 /* added in yy until the next line, */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } else { 1017 /* do local part */ 1018 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* send it on its way */ 1020 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 /* values actually were received in the Begin() but we need to call this nop */ 1022 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1029 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1030 { 1031 MPI_Comm comm; 1032 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1033 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1034 IS Me,Notme; 1035 PetscErrorCode ierr; 1036 PetscInt M,N,first,last,*notme,i; 1037 PetscMPIInt size; 1038 1039 PetscFunctionBegin; 1040 /* Easy test: symmetric diagonal block */ 1041 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1042 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* do local part */ 1082 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1083 /* receive remote parts */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* 1089 This only works correctly for square matrices where the subblock A->A is the 1090 diagonal block 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1095 { 1096 PetscErrorCode ierr; 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1101 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1102 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatScale_MPIAIJ" 1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1115 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatDestroy_Redundant" 1121 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1122 { 1123 PetscErrorCode ierr; 1124 Mat_Redundant *redund = *redundant; 1125 PetscInt i; 1126 1127 PetscFunctionBegin; 1128 *redundant = NULL; 1129 if (redund){ 1130 if (redund->matseq) { /* via MatGetSubMatrices() */ 1131 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1132 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1133 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1134 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1135 } else { 1136 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1137 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1138 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1139 for (i=0; i<redund->nrecvs; i++) { 1140 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1141 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1142 } 1143 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1144 } 1145 1146 if (redund->psubcomm) { 1147 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1148 } 1149 ierr = PetscFree(redund);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatDestroy_MPIAIJ" 1156 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1157 { 1158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1159 PetscErrorCode ierr; 1160 1161 PetscFunctionBegin; 1162 #if defined(PETSC_USE_LOG) 1163 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1164 #endif 1165 ierr = MatDestroy_Redundant(&mat->redundant);CHKERRQ(ierr); 1166 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1167 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1168 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1169 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1170 #if defined(PETSC_USE_CTABLE) 1171 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1172 #else 1173 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1174 #endif 1175 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1176 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1177 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1178 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1179 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1180 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1181 1182 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1188 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1189 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1190 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1191 #if defined(PETSC_HAVE_ELEMENTAL) 1192 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1193 #endif 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1199 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1200 { 1201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1202 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1203 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1204 PetscErrorCode ierr; 1205 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1206 int fd; 1207 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1208 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1209 PetscScalar *column_values; 1210 PetscInt message_count,flowcontrolcount; 1211 FILE *file; 1212 1213 PetscFunctionBegin; 1214 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1215 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1216 nz = A->nz + B->nz; 1217 if (!rank) { 1218 header[0] = MAT_FILE_CLASSID; 1219 header[1] = mat->rmap->N; 1220 header[2] = mat->cmap->N; 1221 1222 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1223 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1224 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1225 /* get largest number of rows any processor has */ 1226 rlen = mat->rmap->n; 1227 range = mat->rmap->range; 1228 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1229 } else { 1230 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 rlen = mat->rmap->n; 1232 } 1233 1234 /* load up the local row counts */ 1235 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1236 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1237 1238 /* store the row lengths to the file */ 1239 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1240 if (!rank) { 1241 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1242 for (i=1; i<size; i++) { 1243 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1244 rlen = range[i+1] - range[i]; 1245 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1246 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1247 } 1248 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1249 } else { 1250 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1251 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1252 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1253 } 1254 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1255 1256 /* load up the local column indices */ 1257 nzmax = nz; /* th processor needs space a largest processor needs */ 1258 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1260 cnt = 0; 1261 for (i=0; i<mat->rmap->n; i++) { 1262 for (j=B->i[i]; j<B->i[i+1]; j++) { 1263 if ((col = garray[B->j[j]]) > cstart) break; 1264 column_indices[cnt++] = col; 1265 } 1266 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1267 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1268 } 1269 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1270 1271 /* store the column indices to the file */ 1272 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1273 if (!rank) { 1274 MPI_Status status; 1275 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1276 for (i=1; i<size; i++) { 1277 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1278 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1279 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1280 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1281 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1282 } 1283 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1284 } else { 1285 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1286 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1287 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1288 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1289 } 1290 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1291 1292 /* load up the local column values */ 1293 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1294 cnt = 0; 1295 for (i=0; i<mat->rmap->n; i++) { 1296 for (j=B->i[i]; j<B->i[i+1]; j++) { 1297 if (garray[B->j[j]] > cstart) break; 1298 column_values[cnt++] = B->a[j]; 1299 } 1300 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1301 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1302 } 1303 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1304 1305 /* store the column values to the file */ 1306 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1307 if (!rank) { 1308 MPI_Status status; 1309 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1310 for (i=1; i<size; i++) { 1311 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1312 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1313 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1314 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1315 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1316 } 1317 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1318 } else { 1319 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1320 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1321 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1322 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1323 } 1324 ierr = PetscFree(column_values);CHKERRQ(ierr); 1325 1326 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1327 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #include <petscdraw.h> 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1334 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1335 { 1336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1337 PetscErrorCode ierr; 1338 PetscMPIInt rank = aij->rank,size = aij->size; 1339 PetscBool isdraw,iascii,isbinary; 1340 PetscViewer sviewer; 1341 PetscViewerFormat format; 1342 1343 PetscFunctionBegin; 1344 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1345 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1346 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1347 if (iascii) { 1348 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1349 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1350 MatInfo info; 1351 PetscBool inodes; 1352 1353 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1354 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1355 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1356 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1357 if (!inodes) { 1358 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1359 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1360 } else { 1361 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1362 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1363 } 1364 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1365 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1366 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1367 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1368 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1369 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1370 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1371 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1374 PetscInt inodecount,inodelimit,*inodes; 1375 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1376 if (inodes) { 1377 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1378 } else { 1379 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1380 } 1381 PetscFunctionReturn(0); 1382 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1383 PetscFunctionReturn(0); 1384 } 1385 } else if (isbinary) { 1386 if (size == 1) { 1387 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1388 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1389 } else { 1390 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1391 } 1392 PetscFunctionReturn(0); 1393 } else if (isdraw) { 1394 PetscDraw draw; 1395 PetscBool isnull; 1396 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1397 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1398 } 1399 1400 { 1401 /* assemble the entire matrix onto first processor. */ 1402 Mat A; 1403 Mat_SeqAIJ *Aloc; 1404 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1405 MatScalar *a; 1406 1407 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1408 if (!rank) { 1409 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1410 } else { 1411 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1412 } 1413 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1414 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1415 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1416 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1417 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1418 1419 /* copy over the A part */ 1420 Aloc = (Mat_SeqAIJ*)aij->A->data; 1421 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1422 row = mat->rmap->rstart; 1423 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1424 for (i=0; i<m; i++) { 1425 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1426 row++; 1427 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1428 } 1429 aj = Aloc->j; 1430 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1431 1432 /* copy over the B part */ 1433 Aloc = (Mat_SeqAIJ*)aij->B->data; 1434 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1435 row = mat->rmap->rstart; 1436 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1437 ct = cols; 1438 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1439 for (i=0; i<m; i++) { 1440 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1441 row++; 1442 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1443 } 1444 ierr = PetscFree(ct);CHKERRQ(ierr); 1445 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1447 /* 1448 Everyone has to call to draw the matrix since the graphics waits are 1449 synchronized across all processors that share the PetscDraw object 1450 */ 1451 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1452 if (!rank) { 1453 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1454 } 1455 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1456 ierr = MatDestroy(&A);CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatView_MPIAIJ" 1463 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1464 { 1465 PetscErrorCode ierr; 1466 PetscBool iascii,isdraw,issocket,isbinary; 1467 1468 PetscFunctionBegin; 1469 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1473 if (iascii || isdraw || isbinary || issocket) { 1474 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1475 } 1476 PetscFunctionReturn(0); 1477 } 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatSOR_MPIAIJ" 1481 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1482 { 1483 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1484 PetscErrorCode ierr; 1485 Vec bb1 = 0; 1486 PetscBool hasop; 1487 1488 PetscFunctionBegin; 1489 if (flag == SOR_APPLY_UPPER) { 1490 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1495 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1496 } 1497 1498 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1499 if (flag & SOR_ZERO_INITIAL_GUESS) { 1500 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1501 its--; 1502 } 1503 1504 while (its--) { 1505 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1507 1508 /* update rhs: bb1 = bb - B*x */ 1509 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1510 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1511 1512 /* local sweep */ 1513 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1514 } 1515 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1516 if (flag & SOR_ZERO_INITIAL_GUESS) { 1517 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1518 its--; 1519 } 1520 while (its--) { 1521 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1522 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1523 1524 /* update rhs: bb1 = bb - B*x */ 1525 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1526 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1527 1528 /* local sweep */ 1529 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1530 } 1531 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1532 if (flag & SOR_ZERO_INITIAL_GUESS) { 1533 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1534 its--; 1535 } 1536 while (its--) { 1537 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1538 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1539 1540 /* update rhs: bb1 = bb - B*x */ 1541 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1542 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1543 1544 /* local sweep */ 1545 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1546 } 1547 } else if (flag & SOR_EISENSTAT) { 1548 Vec xx1; 1549 1550 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1551 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1552 1553 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1554 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 if (!mat->diag) { 1556 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1557 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1558 } 1559 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1560 if (hasop) { 1561 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1562 } else { 1563 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1564 } 1565 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1566 1567 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1568 1569 /* local sweep */ 1570 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1571 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1572 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1573 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1574 1575 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatPermute_MPIAIJ" 1581 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1582 { 1583 Mat aA,aB,Aperm; 1584 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1585 PetscScalar *aa,*ba; 1586 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1587 PetscSF rowsf,sf; 1588 IS parcolp = NULL; 1589 PetscBool done; 1590 PetscErrorCode ierr; 1591 1592 PetscFunctionBegin; 1593 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1594 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1595 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1596 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1597 1598 /* Invert row permutation to find out where my rows should go */ 1599 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1600 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1601 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1602 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1603 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1604 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1605 1606 /* Invert column permutation to find out where my columns should go */ 1607 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1608 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1609 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1610 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1611 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1612 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1613 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1614 1615 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1616 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1617 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1618 1619 /* Find out where my gcols should go */ 1620 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1621 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1622 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1623 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1624 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1625 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1626 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1627 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1628 1629 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1630 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1631 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1632 for (i=0; i<m; i++) { 1633 PetscInt row = rdest[i],rowner; 1634 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1635 for (j=ai[i]; j<ai[i+1]; j++) { 1636 PetscInt cowner,col = cdest[aj[j]]; 1637 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1638 if (rowner == cowner) dnnz[i]++; 1639 else onnz[i]++; 1640 } 1641 for (j=bi[i]; j<bi[i+1]; j++) { 1642 PetscInt cowner,col = gcdest[bj[j]]; 1643 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1644 if (rowner == cowner) dnnz[i]++; 1645 else onnz[i]++; 1646 } 1647 } 1648 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1649 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1650 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1651 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1652 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1653 1654 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1655 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1656 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1657 for (i=0; i<m; i++) { 1658 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1659 PetscInt j0,rowlen; 1660 rowlen = ai[i+1] - ai[i]; 1661 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1662 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1663 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1664 } 1665 rowlen = bi[i+1] - bi[i]; 1666 for (j0=j=0; j<rowlen; j0=j) { 1667 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1668 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1669 } 1670 } 1671 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1673 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1674 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1675 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1676 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1677 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1678 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1679 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1680 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1681 *B = Aperm; 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1687 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1688 { 1689 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1690 Mat A = mat->A,B = mat->B; 1691 PetscErrorCode ierr; 1692 PetscReal isend[5],irecv[5]; 1693 1694 PetscFunctionBegin; 1695 info->block_size = 1.0; 1696 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1697 1698 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1699 isend[3] = info->memory; isend[4] = info->mallocs; 1700 1701 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1702 1703 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1704 isend[3] += info->memory; isend[4] += info->mallocs; 1705 if (flag == MAT_LOCAL) { 1706 info->nz_used = isend[0]; 1707 info->nz_allocated = isend[1]; 1708 info->nz_unneeded = isend[2]; 1709 info->memory = isend[3]; 1710 info->mallocs = isend[4]; 1711 } else if (flag == MAT_GLOBAL_MAX) { 1712 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1713 1714 info->nz_used = irecv[0]; 1715 info->nz_allocated = irecv[1]; 1716 info->nz_unneeded = irecv[2]; 1717 info->memory = irecv[3]; 1718 info->mallocs = irecv[4]; 1719 } else if (flag == MAT_GLOBAL_SUM) { 1720 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1721 1722 info->nz_used = irecv[0]; 1723 info->nz_allocated = irecv[1]; 1724 info->nz_unneeded = irecv[2]; 1725 info->memory = irecv[3]; 1726 info->mallocs = irecv[4]; 1727 } 1728 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1729 info->fill_ratio_needed = 0; 1730 info->factor_mallocs = 0; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatSetOption_MPIAIJ" 1736 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1737 { 1738 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 switch (op) { 1743 case MAT_NEW_NONZERO_LOCATIONS: 1744 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1745 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1746 case MAT_KEEP_NONZERO_PATTERN: 1747 case MAT_NEW_NONZERO_LOCATION_ERR: 1748 case MAT_USE_INODES: 1749 case MAT_IGNORE_ZERO_ENTRIES: 1750 MatCheckPreallocated(A,1); 1751 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1752 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1753 break; 1754 case MAT_ROW_ORIENTED: 1755 a->roworiented = flg; 1756 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1759 break; 1760 case MAT_NEW_DIAGONALS: 1761 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1762 break; 1763 case MAT_IGNORE_OFF_PROC_ENTRIES: 1764 a->donotstash = flg; 1765 break; 1766 case MAT_SPD: 1767 A->spd_set = PETSC_TRUE; 1768 A->spd = flg; 1769 if (flg) { 1770 A->symmetric = PETSC_TRUE; 1771 A->structurally_symmetric = PETSC_TRUE; 1772 A->symmetric_set = PETSC_TRUE; 1773 A->structurally_symmetric_set = PETSC_TRUE; 1774 } 1775 break; 1776 case MAT_SYMMETRIC: 1777 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1778 break; 1779 case MAT_STRUCTURALLY_SYMMETRIC: 1780 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1781 break; 1782 case MAT_HERMITIAN: 1783 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1784 break; 1785 case MAT_SYMMETRY_ETERNAL: 1786 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1787 break; 1788 default: 1789 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1790 } 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatGetRow_MPIAIJ" 1796 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1797 { 1798 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1799 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1800 PetscErrorCode ierr; 1801 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1802 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1803 PetscInt *cmap,*idx_p; 1804 1805 PetscFunctionBegin; 1806 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1807 mat->getrowactive = PETSC_TRUE; 1808 1809 if (!mat->rowvalues && (idx || v)) { 1810 /* 1811 allocate enough space to hold information from the longest row. 1812 */ 1813 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1814 PetscInt max = 1,tmp; 1815 for (i=0; i<matin->rmap->n; i++) { 1816 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1817 if (max < tmp) max = tmp; 1818 } 1819 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1820 } 1821 1822 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1823 lrow = row - rstart; 1824 1825 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1826 if (!v) {pvA = 0; pvB = 0;} 1827 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1828 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1829 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1830 nztot = nzA + nzB; 1831 1832 cmap = mat->garray; 1833 if (v || idx) { 1834 if (nztot) { 1835 /* Sort by increasing column numbers, assuming A and B already sorted */ 1836 PetscInt imark = -1; 1837 if (v) { 1838 *v = v_p = mat->rowvalues; 1839 for (i=0; i<nzB; i++) { 1840 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1841 else break; 1842 } 1843 imark = i; 1844 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1845 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1846 } 1847 if (idx) { 1848 *idx = idx_p = mat->rowindices; 1849 if (imark > -1) { 1850 for (i=0; i<imark; i++) { 1851 idx_p[i] = cmap[cworkB[i]]; 1852 } 1853 } else { 1854 for (i=0; i<nzB; i++) { 1855 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1856 else break; 1857 } 1858 imark = i; 1859 } 1860 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1861 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1862 } 1863 } else { 1864 if (idx) *idx = 0; 1865 if (v) *v = 0; 1866 } 1867 } 1868 *nz = nztot; 1869 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1870 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1876 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1877 { 1878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1879 1880 PetscFunctionBegin; 1881 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1882 aij->getrowactive = PETSC_FALSE; 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatNorm_MPIAIJ" 1888 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1889 { 1890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1891 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1892 PetscErrorCode ierr; 1893 PetscInt i,j,cstart = mat->cmap->rstart; 1894 PetscReal sum = 0.0; 1895 MatScalar *v; 1896 1897 PetscFunctionBegin; 1898 if (aij->size == 1) { 1899 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1900 } else { 1901 if (type == NORM_FROBENIUS) { 1902 v = amat->a; 1903 for (i=0; i<amat->nz; i++) { 1904 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1905 } 1906 v = bmat->a; 1907 for (i=0; i<bmat->nz; i++) { 1908 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1909 } 1910 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1911 *norm = PetscSqrtReal(*norm); 1912 } else if (type == NORM_1) { /* max column norm */ 1913 PetscReal *tmp,*tmp2; 1914 PetscInt *jj,*garray = aij->garray; 1915 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1916 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1917 *norm = 0.0; 1918 v = amat->a; jj = amat->j; 1919 for (j=0; j<amat->nz; j++) { 1920 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1921 } 1922 v = bmat->a; jj = bmat->j; 1923 for (j=0; j<bmat->nz; j++) { 1924 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1925 } 1926 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1927 for (j=0; j<mat->cmap->N; j++) { 1928 if (tmp2[j] > *norm) *norm = tmp2[j]; 1929 } 1930 ierr = PetscFree(tmp);CHKERRQ(ierr); 1931 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1932 } else if (type == NORM_INFINITY) { /* max row norm */ 1933 PetscReal ntemp = 0.0; 1934 for (j=0; j<aij->A->rmap->n; j++) { 1935 v = amat->a + amat->i[j]; 1936 sum = 0.0; 1937 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1938 sum += PetscAbsScalar(*v); v++; 1939 } 1940 v = bmat->a + bmat->i[j]; 1941 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1942 sum += PetscAbsScalar(*v); v++; 1943 } 1944 if (sum > ntemp) ntemp = sum; 1945 } 1946 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1947 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1948 } 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatTranspose_MPIAIJ" 1954 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1955 { 1956 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1957 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1958 PetscErrorCode ierr; 1959 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1960 PetscInt cstart = A->cmap->rstart,ncol; 1961 Mat B; 1962 MatScalar *array; 1963 1964 PetscFunctionBegin; 1965 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1966 1967 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1968 ai = Aloc->i; aj = Aloc->j; 1969 bi = Bloc->i; bj = Bloc->j; 1970 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1971 PetscInt *d_nnz,*g_nnz,*o_nnz; 1972 PetscSFNode *oloc; 1973 PETSC_UNUSED PetscSF sf; 1974 1975 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1976 /* compute d_nnz for preallocation */ 1977 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1978 for (i=0; i<ai[ma]; i++) { 1979 d_nnz[aj[i]]++; 1980 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1981 } 1982 /* compute local off-diagonal contributions */ 1983 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1984 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1985 /* map those to global */ 1986 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1987 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1988 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1989 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1990 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1991 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1992 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1993 1994 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1995 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1996 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1997 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1998 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1999 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2000 } else { 2001 B = *matout; 2002 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2003 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2004 } 2005 2006 /* copy over the A part */ 2007 array = Aloc->a; 2008 row = A->rmap->rstart; 2009 for (i=0; i<ma; i++) { 2010 ncol = ai[i+1]-ai[i]; 2011 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2012 row++; 2013 array += ncol; aj += ncol; 2014 } 2015 aj = Aloc->j; 2016 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2017 2018 /* copy over the B part */ 2019 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2020 array = Bloc->a; 2021 row = A->rmap->rstart; 2022 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2023 cols_tmp = cols; 2024 for (i=0; i<mb; i++) { 2025 ncol = bi[i+1]-bi[i]; 2026 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2027 row++; 2028 array += ncol; cols_tmp += ncol; 2029 } 2030 ierr = PetscFree(cols);CHKERRQ(ierr); 2031 2032 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2033 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2034 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2035 *matout = B; 2036 } else { 2037 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2038 } 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2044 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2045 { 2046 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2047 Mat a = aij->A,b = aij->B; 2048 PetscErrorCode ierr; 2049 PetscInt s1,s2,s3; 2050 2051 PetscFunctionBegin; 2052 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2053 if (rr) { 2054 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2055 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2056 /* Overlap communication with computation. */ 2057 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2058 } 2059 if (ll) { 2060 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2061 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2062 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2063 } 2064 /* scale the diagonal block */ 2065 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2066 2067 if (rr) { 2068 /* Do a scatter end and then right scale the off-diagonal block */ 2069 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2070 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2077 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2078 { 2079 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2080 PetscErrorCode ierr; 2081 2082 PetscFunctionBegin; 2083 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatEqual_MPIAIJ" 2089 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2090 { 2091 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2092 Mat a,b,c,d; 2093 PetscBool flg; 2094 PetscErrorCode ierr; 2095 2096 PetscFunctionBegin; 2097 a = matA->A; b = matA->B; 2098 c = matB->A; d = matB->B; 2099 2100 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2101 if (flg) { 2102 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2103 } 2104 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatCopy_MPIAIJ" 2110 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2111 { 2112 PetscErrorCode ierr; 2113 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2114 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2115 2116 PetscFunctionBegin; 2117 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2118 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2119 /* because of the column compression in the off-processor part of the matrix a->B, 2120 the number of columns in a->B and b->B may be different, hence we cannot call 2121 the MatCopy() directly on the two parts. If need be, we can provide a more 2122 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2123 then copying the submatrices */ 2124 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2125 } else { 2126 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2127 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2128 } 2129 PetscFunctionReturn(0); 2130 } 2131 2132 #undef __FUNCT__ 2133 #define __FUNCT__ "MatSetUp_MPIAIJ" 2134 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2135 { 2136 PetscErrorCode ierr; 2137 2138 PetscFunctionBegin; 2139 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2140 PetscFunctionReturn(0); 2141 } 2142 2143 /* 2144 Computes the number of nonzeros per row needed for preallocation when X and Y 2145 have different nonzero structure. 2146 */ 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2149 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2150 { 2151 PetscInt i,j,k,nzx,nzy; 2152 2153 PetscFunctionBegin; 2154 /* Set the number of nonzeros in the new matrix */ 2155 for (i=0; i<m; i++) { 2156 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2157 nzx = xi[i+1] - xi[i]; 2158 nzy = yi[i+1] - yi[i]; 2159 nnz[i] = 0; 2160 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2161 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2162 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2163 nnz[i]++; 2164 } 2165 for (; k<nzy; k++) nnz[i]++; 2166 } 2167 PetscFunctionReturn(0); 2168 } 2169 2170 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2173 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2174 { 2175 PetscErrorCode ierr; 2176 PetscInt m = Y->rmap->N; 2177 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2178 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2179 2180 PetscFunctionBegin; 2181 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2182 PetscFunctionReturn(0); 2183 } 2184 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatAXPY_MPIAIJ" 2187 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2188 { 2189 PetscErrorCode ierr; 2190 PetscInt i; 2191 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2192 PetscBLASInt bnz,one=1; 2193 Mat_SeqAIJ *x,*y; 2194 2195 PetscFunctionBegin; 2196 if (str == SAME_NONZERO_PATTERN) { 2197 PetscScalar alpha = a; 2198 x = (Mat_SeqAIJ*)xx->A->data; 2199 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2200 y = (Mat_SeqAIJ*)yy->A->data; 2201 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2202 x = (Mat_SeqAIJ*)xx->B->data; 2203 y = (Mat_SeqAIJ*)yy->B->data; 2204 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2205 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2206 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2207 } else if (str == SUBSET_NONZERO_PATTERN) { 2208 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2209 2210 x = (Mat_SeqAIJ*)xx->B->data; 2211 y = (Mat_SeqAIJ*)yy->B->data; 2212 if (y->xtoy && y->XtoY != xx->B) { 2213 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2214 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2215 } 2216 if (!y->xtoy) { /* get xtoy */ 2217 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2218 y->XtoY = xx->B; 2219 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2220 } 2221 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2222 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2223 } else { 2224 Mat B; 2225 PetscInt *nnz_d,*nnz_o; 2226 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2227 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2228 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2229 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2230 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2231 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2232 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2233 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2234 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2235 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2236 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2237 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2238 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2239 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2240 } 2241 PetscFunctionReturn(0); 2242 } 2243 2244 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2245 2246 #undef __FUNCT__ 2247 #define __FUNCT__ "MatConjugate_MPIAIJ" 2248 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2249 { 2250 #if defined(PETSC_USE_COMPLEX) 2251 PetscErrorCode ierr; 2252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2253 2254 PetscFunctionBegin; 2255 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2256 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2257 #else 2258 PetscFunctionBegin; 2259 #endif 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatRealPart_MPIAIJ" 2265 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2266 { 2267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2268 PetscErrorCode ierr; 2269 2270 PetscFunctionBegin; 2271 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2272 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2273 PetscFunctionReturn(0); 2274 } 2275 2276 #undef __FUNCT__ 2277 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2278 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2279 { 2280 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2281 PetscErrorCode ierr; 2282 2283 PetscFunctionBegin; 2284 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2285 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #if defined(PETSC_HAVE_PBGL) 2290 2291 #include <boost/parallel/mpi/bsp_process_group.hpp> 2292 #include <boost/graph/distributed/ilu_default_graph.hpp> 2293 #include <boost/graph/distributed/ilu_0_block.hpp> 2294 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2295 #include <boost/graph/distributed/petsc/interface.hpp> 2296 #include <boost/multi_array.hpp> 2297 #include <boost/parallel/distributed_property_map->hpp> 2298 2299 #undef __FUNCT__ 2300 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2301 /* 2302 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2303 */ 2304 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2305 { 2306 namespace petsc = boost::distributed::petsc; 2307 2308 namespace graph_dist = boost::graph::distributed; 2309 using boost::graph::distributed::ilu_default::process_group_type; 2310 using boost::graph::ilu_permuted; 2311 2312 PetscBool row_identity, col_identity; 2313 PetscContainer c; 2314 PetscInt m, n, M, N; 2315 PetscErrorCode ierr; 2316 2317 PetscFunctionBegin; 2318 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2319 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2320 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2321 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2322 2323 process_group_type pg; 2324 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2325 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2326 lgraph_type& level_graph = *lgraph_p; 2327 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2328 2329 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2330 ilu_permuted(level_graph); 2331 2332 /* put together the new matrix */ 2333 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2334 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2335 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2336 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2337 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2338 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2339 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2340 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2341 2342 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2343 ierr = PetscContainerSetPointer(c, lgraph_p); 2344 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2345 ierr = PetscContainerDestroy(&c); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2351 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2352 { 2353 PetscFunctionBegin; 2354 PetscFunctionReturn(0); 2355 } 2356 2357 #undef __FUNCT__ 2358 #define __FUNCT__ "MatSolve_MPIAIJ" 2359 /* 2360 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2361 */ 2362 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2363 { 2364 namespace graph_dist = boost::graph::distributed; 2365 2366 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2367 lgraph_type *lgraph_p; 2368 PetscContainer c; 2369 PetscErrorCode ierr; 2370 2371 PetscFunctionBegin; 2372 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2373 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2374 ierr = VecCopy(b, x);CHKERRQ(ierr); 2375 2376 PetscScalar *array_x; 2377 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2378 PetscInt sx; 2379 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2380 2381 PetscScalar *array_b; 2382 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2383 PetscInt sb; 2384 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2385 2386 lgraph_type& level_graph = *lgraph_p; 2387 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2388 2389 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2390 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2391 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2392 2393 typedef boost::iterator_property_map<array_ref_type::iterator, 2394 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2395 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2396 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2397 2398 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2399 PetscFunctionReturn(0); 2400 } 2401 #endif 2402 2403 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2406 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2407 { 2408 PetscMPIInt rank,size; 2409 MPI_Comm comm; 2410 PetscErrorCode ierr; 2411 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2412 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2413 PetscInt *rowrange = mat->rmap->range; 2414 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2415 Mat A = aij->A,B=aij->B,C=*matredundant; 2416 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2417 PetscScalar *sbuf_a; 2418 PetscInt nzlocal=a->nz+b->nz; 2419 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2420 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2421 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2422 MatScalar *aworkA,*aworkB; 2423 PetscScalar *vals; 2424 PetscMPIInt tag1,tag2,tag3,imdex; 2425 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2426 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2427 MPI_Status recv_status,*send_status; 2428 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2429 PetscInt **rbuf_j=NULL; 2430 PetscScalar **rbuf_a=NULL; 2431 Mat_Redundant *redund =NULL; 2432 2433 PetscFunctionBegin; 2434 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2435 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2436 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2437 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2438 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2439 2440 if (reuse == MAT_REUSE_MATRIX) { 2441 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2442 2443 redund = C->redundant; 2444 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2445 2446 nsends = redund->nsends; 2447 nrecvs = redund->nrecvs; 2448 send_rank = redund->send_rank; 2449 recv_rank = redund->recv_rank; 2450 sbuf_nz = redund->sbuf_nz; 2451 rbuf_nz = redund->rbuf_nz; 2452 sbuf_j = redund->sbuf_j; 2453 sbuf_a = redund->sbuf_a; 2454 rbuf_j = redund->rbuf_j; 2455 rbuf_a = redund->rbuf_a; 2456 } 2457 2458 if (reuse == MAT_INITIAL_MATRIX) { 2459 PetscInt nleftover,np_subcomm; 2460 2461 /* get the destination processors' id send_rank, nsends and nrecvs */ 2462 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2463 2464 np_subcomm = size/nsubcomm; 2465 nleftover = size - nsubcomm*np_subcomm; 2466 2467 /* block of codes below is specific for INTERLACED */ 2468 /* ------------------------------------------------*/ 2469 nsends = 0; nrecvs = 0; 2470 for (i=0; i<size; i++) { 2471 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2472 send_rank[nsends++] = i; 2473 recv_rank[nrecvs++] = i; 2474 } 2475 } 2476 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2477 i = size-nleftover-1; 2478 j = 0; 2479 while (j < nsubcomm - nleftover) { 2480 send_rank[nsends++] = i; 2481 i--; j++; 2482 } 2483 } 2484 2485 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2486 for (i=0; i<nleftover; i++) { 2487 recv_rank[nrecvs++] = size-nleftover+i; 2488 } 2489 } 2490 /*----------------------------------------------*/ 2491 2492 /* allocate sbuf_j, sbuf_a */ 2493 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2494 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2495 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2496 /* 2497 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2498 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2499 */ 2500 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2501 2502 /* copy mat's local entries into the buffers */ 2503 if (reuse == MAT_INITIAL_MATRIX) { 2504 rownz_max = 0; 2505 rptr = sbuf_j; 2506 cols = sbuf_j + rend-rstart + 1; 2507 vals = sbuf_a; 2508 rptr[0] = 0; 2509 for (i=0; i<rend-rstart; i++) { 2510 row = i + rstart; 2511 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2512 ncols = nzA + nzB; 2513 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2514 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2515 /* load the column indices for this row into cols */ 2516 lwrite = 0; 2517 for (l=0; l<nzB; l++) { 2518 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2519 vals[lwrite] = aworkB[l]; 2520 cols[lwrite++] = ctmp; 2521 } 2522 } 2523 for (l=0; l<nzA; l++) { 2524 vals[lwrite] = aworkA[l]; 2525 cols[lwrite++] = cstart + cworkA[l]; 2526 } 2527 for (l=0; l<nzB; l++) { 2528 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2529 vals[lwrite] = aworkB[l]; 2530 cols[lwrite++] = ctmp; 2531 } 2532 } 2533 vals += ncols; 2534 cols += ncols; 2535 rptr[i+1] = rptr[i] + ncols; 2536 if (rownz_max < ncols) rownz_max = ncols; 2537 } 2538 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2539 } else { /* only copy matrix values into sbuf_a */ 2540 rptr = sbuf_j; 2541 vals = sbuf_a; 2542 rptr[0] = 0; 2543 for (i=0; i<rend-rstart; i++) { 2544 row = i + rstart; 2545 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2546 ncols = nzA + nzB; 2547 cworkB = b->j + b->i[i]; 2548 aworkA = a->a + a->i[i]; 2549 aworkB = b->a + b->i[i]; 2550 lwrite = 0; 2551 for (l=0; l<nzB; l++) { 2552 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2553 } 2554 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2555 for (l=0; l<nzB; l++) { 2556 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2557 } 2558 vals += ncols; 2559 rptr[i+1] = rptr[i] + ncols; 2560 } 2561 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2562 2563 /* send nzlocal to others, and recv other's nzlocal */ 2564 /*--------------------------------------------------*/ 2565 if (reuse == MAT_INITIAL_MATRIX) { 2566 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2567 2568 s_waits2 = s_waits3 + nsends; 2569 s_waits1 = s_waits2 + nsends; 2570 r_waits1 = s_waits1 + nsends; 2571 r_waits2 = r_waits1 + nrecvs; 2572 r_waits3 = r_waits2 + nrecvs; 2573 } else { 2574 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2575 2576 r_waits3 = s_waits3 + nsends; 2577 } 2578 2579 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2580 if (reuse == MAT_INITIAL_MATRIX) { 2581 /* get new tags to keep the communication clean */ 2582 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2583 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2584 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2585 2586 /* post receives of other's nzlocal */ 2587 for (i=0; i<nrecvs; i++) { 2588 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2589 } 2590 /* send nzlocal to others */ 2591 for (i=0; i<nsends; i++) { 2592 sbuf_nz[i] = nzlocal; 2593 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2594 } 2595 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2596 count = nrecvs; 2597 while (count) { 2598 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2599 2600 recv_rank[imdex] = recv_status.MPI_SOURCE; 2601 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2602 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2603 2604 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2605 2606 rbuf_nz[imdex] += i + 2; 2607 2608 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2609 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2610 count--; 2611 } 2612 /* wait on sends of nzlocal */ 2613 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2614 /* send mat->i,j to others, and recv from other's */ 2615 /*------------------------------------------------*/ 2616 for (i=0; i<nsends; i++) { 2617 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2618 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2619 } 2620 /* wait on receives of mat->i,j */ 2621 /*------------------------------*/ 2622 count = nrecvs; 2623 while (count) { 2624 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2625 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2626 count--; 2627 } 2628 /* wait on sends of mat->i,j */ 2629 /*---------------------------*/ 2630 if (nsends) { 2631 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2632 } 2633 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2634 2635 /* post receives, send and receive mat->a */ 2636 /*----------------------------------------*/ 2637 for (imdex=0; imdex<nrecvs; imdex++) { 2638 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2639 } 2640 for (i=0; i<nsends; i++) { 2641 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2642 } 2643 count = nrecvs; 2644 while (count) { 2645 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2646 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2647 count--; 2648 } 2649 if (nsends) { 2650 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2651 } 2652 2653 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2654 2655 /* create redundant matrix */ 2656 /*-------------------------*/ 2657 if (reuse == MAT_INITIAL_MATRIX) { 2658 const PetscInt *range; 2659 PetscInt rstart_sub,rend_sub,mloc_sub; 2660 2661 /* compute rownz_max for preallocation */ 2662 for (imdex=0; imdex<nrecvs; imdex++) { 2663 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2664 rptr = rbuf_j[imdex]; 2665 for (i=0; i<j; i++) { 2666 ncols = rptr[i+1] - rptr[i]; 2667 if (rownz_max < ncols) rownz_max = ncols; 2668 } 2669 } 2670 2671 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2672 2673 /* get local size of redundant matrix 2674 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2675 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2676 rstart_sub = range[nsubcomm*subrank]; 2677 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2678 rend_sub = range[nsubcomm*(subrank+1)]; 2679 } else { 2680 rend_sub = mat->rmap->N; 2681 } 2682 mloc_sub = rend_sub - rstart_sub; 2683 2684 if (M == N) { 2685 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2686 } else { /* non-square matrix */ 2687 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2688 } 2689 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2690 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2691 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2692 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2693 } else { 2694 C = *matredundant; 2695 } 2696 2697 /* insert local matrix entries */ 2698 rptr = sbuf_j; 2699 cols = sbuf_j + rend-rstart + 1; 2700 vals = sbuf_a; 2701 for (i=0; i<rend-rstart; i++) { 2702 row = i + rstart; 2703 ncols = rptr[i+1] - rptr[i]; 2704 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2705 vals += ncols; 2706 cols += ncols; 2707 } 2708 /* insert received matrix entries */ 2709 for (imdex=0; imdex<nrecvs; imdex++) { 2710 rstart = rowrange[recv_rank[imdex]]; 2711 rend = rowrange[recv_rank[imdex]+1]; 2712 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2713 rptr = rbuf_j[imdex]; 2714 cols = rbuf_j[imdex] + rend-rstart + 1; 2715 vals = rbuf_a[imdex]; 2716 for (i=0; i<rend-rstart; i++) { 2717 row = i + rstart; 2718 ncols = rptr[i+1] - rptr[i]; 2719 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2720 vals += ncols; 2721 cols += ncols; 2722 } 2723 } 2724 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2725 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2726 2727 if (reuse == MAT_INITIAL_MATRIX) { 2728 *matredundant = C; 2729 2730 /* create a supporting struct and attach it to C for reuse */ 2731 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2732 C->redundant = redund; 2733 redund->nzlocal = nzlocal; 2734 redund->nsends = nsends; 2735 redund->nrecvs = nrecvs; 2736 redund->send_rank = send_rank; 2737 redund->recv_rank = recv_rank; 2738 redund->sbuf_nz = sbuf_nz; 2739 redund->rbuf_nz = rbuf_nz; 2740 redund->sbuf_j = sbuf_j; 2741 redund->sbuf_a = sbuf_a; 2742 redund->rbuf_j = rbuf_j; 2743 redund->rbuf_a = rbuf_a; 2744 redund->psubcomm = NULL; 2745 } 2746 PetscFunctionReturn(0); 2747 } 2748 2749 #undef __FUNCT__ 2750 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2751 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2752 { 2753 PetscErrorCode ierr; 2754 MPI_Comm comm; 2755 PetscMPIInt size,subsize; 2756 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2757 Mat_Redundant *redund=NULL; 2758 PetscSubcomm psubcomm=NULL; 2759 MPI_Comm subcomm_in=subcomm; 2760 Mat *matseq; 2761 IS isrow,iscol; 2762 2763 PetscFunctionBegin; 2764 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2765 if (reuse == MAT_INITIAL_MATRIX) { 2766 /* create psubcomm, then get subcomm */ 2767 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2768 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2769 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2770 2771 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2772 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2773 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2774 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2775 subcomm = psubcomm->comm; 2776 } else { /* retrieve psubcomm and subcomm */ 2777 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2778 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2779 redund = (*matredundant)->redundant; 2780 psubcomm = redund->psubcomm; 2781 } 2782 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2783 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2784 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2785 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2786 (*matredundant)->redundant->psubcomm = psubcomm; 2787 } 2788 PetscFunctionReturn(0); 2789 } 2790 } 2791 2792 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2793 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2794 if (reuse == MAT_INITIAL_MATRIX) { 2795 /* create a local sequential matrix matseq[0] */ 2796 mloc_sub = PETSC_DECIDE; 2797 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2798 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2799 rstart = rend - mloc_sub; 2800 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2801 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2802 } else { /* reuse == MAT_REUSE_MATRIX */ 2803 redund = (*matredundant)->redundant; 2804 isrow = redund->isrow; 2805 iscol = redund->iscol; 2806 matseq = redund->matseq; 2807 } 2808 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2809 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2810 2811 if (reuse == MAT_INITIAL_MATRIX) { 2812 /* create a supporting struct and attach it to C for reuse */ 2813 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2814 (*matredundant)->redundant = redund; 2815 redund->isrow = isrow; 2816 redund->iscol = iscol; 2817 redund->matseq = matseq; 2818 redund->psubcomm = psubcomm; 2819 } 2820 PetscFunctionReturn(0); 2821 } 2822 2823 #undef __FUNCT__ 2824 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2825 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2826 { 2827 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2828 PetscErrorCode ierr; 2829 PetscInt i,*idxb = 0; 2830 PetscScalar *va,*vb; 2831 Vec vtmp; 2832 2833 PetscFunctionBegin; 2834 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2835 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2836 if (idx) { 2837 for (i=0; i<A->rmap->n; i++) { 2838 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2839 } 2840 } 2841 2842 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2843 if (idx) { 2844 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2845 } 2846 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2847 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2848 2849 for (i=0; i<A->rmap->n; i++) { 2850 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2851 va[i] = vb[i]; 2852 if (idx) idx[i] = a->garray[idxb[i]]; 2853 } 2854 } 2855 2856 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2857 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2858 ierr = PetscFree(idxb);CHKERRQ(ierr); 2859 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2860 PetscFunctionReturn(0); 2861 } 2862 2863 #undef __FUNCT__ 2864 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2865 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2866 { 2867 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2868 PetscErrorCode ierr; 2869 PetscInt i,*idxb = 0; 2870 PetscScalar *va,*vb; 2871 Vec vtmp; 2872 2873 PetscFunctionBegin; 2874 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2875 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2876 if (idx) { 2877 for (i=0; i<A->cmap->n; i++) { 2878 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2879 } 2880 } 2881 2882 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2883 if (idx) { 2884 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2885 } 2886 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2887 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2888 2889 for (i=0; i<A->rmap->n; i++) { 2890 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2891 va[i] = vb[i]; 2892 if (idx) idx[i] = a->garray[idxb[i]]; 2893 } 2894 } 2895 2896 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2897 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2898 ierr = PetscFree(idxb);CHKERRQ(ierr); 2899 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2900 PetscFunctionReturn(0); 2901 } 2902 2903 #undef __FUNCT__ 2904 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2905 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2906 { 2907 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2908 PetscInt n = A->rmap->n; 2909 PetscInt cstart = A->cmap->rstart; 2910 PetscInt *cmap = mat->garray; 2911 PetscInt *diagIdx, *offdiagIdx; 2912 Vec diagV, offdiagV; 2913 PetscScalar *a, *diagA, *offdiagA; 2914 PetscInt r; 2915 PetscErrorCode ierr; 2916 2917 PetscFunctionBegin; 2918 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2919 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2920 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2921 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2922 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2923 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2924 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2925 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2926 for (r = 0; r < n; ++r) { 2927 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2928 a[r] = diagA[r]; 2929 idx[r] = cstart + diagIdx[r]; 2930 } else { 2931 a[r] = offdiagA[r]; 2932 idx[r] = cmap[offdiagIdx[r]]; 2933 } 2934 } 2935 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2936 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2937 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2938 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2939 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2940 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2941 PetscFunctionReturn(0); 2942 } 2943 2944 #undef __FUNCT__ 2945 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2946 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2947 { 2948 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2949 PetscInt n = A->rmap->n; 2950 PetscInt cstart = A->cmap->rstart; 2951 PetscInt *cmap = mat->garray; 2952 PetscInt *diagIdx, *offdiagIdx; 2953 Vec diagV, offdiagV; 2954 PetscScalar *a, *diagA, *offdiagA; 2955 PetscInt r; 2956 PetscErrorCode ierr; 2957 2958 PetscFunctionBegin; 2959 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2960 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2961 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2962 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2963 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2964 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2965 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2966 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2967 for (r = 0; r < n; ++r) { 2968 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2969 a[r] = diagA[r]; 2970 idx[r] = cstart + diagIdx[r]; 2971 } else { 2972 a[r] = offdiagA[r]; 2973 idx[r] = cmap[offdiagIdx[r]]; 2974 } 2975 } 2976 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2977 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2978 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2979 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2980 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2981 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2982 PetscFunctionReturn(0); 2983 } 2984 2985 #undef __FUNCT__ 2986 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2987 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2988 { 2989 PetscErrorCode ierr; 2990 Mat *dummy; 2991 2992 PetscFunctionBegin; 2993 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2994 *newmat = *dummy; 2995 ierr = PetscFree(dummy);CHKERRQ(ierr); 2996 PetscFunctionReturn(0); 2997 } 2998 2999 #undef __FUNCT__ 3000 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3001 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3002 { 3003 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3004 PetscErrorCode ierr; 3005 3006 PetscFunctionBegin; 3007 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3008 PetscFunctionReturn(0); 3009 } 3010 3011 #undef __FUNCT__ 3012 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3013 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3014 { 3015 PetscErrorCode ierr; 3016 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3017 3018 PetscFunctionBegin; 3019 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3020 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3021 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3022 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3023 PetscFunctionReturn(0); 3024 } 3025 3026 /* -------------------------------------------------------------------*/ 3027 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3028 MatGetRow_MPIAIJ, 3029 MatRestoreRow_MPIAIJ, 3030 MatMult_MPIAIJ, 3031 /* 4*/ MatMultAdd_MPIAIJ, 3032 MatMultTranspose_MPIAIJ, 3033 MatMultTransposeAdd_MPIAIJ, 3034 #if defined(PETSC_HAVE_PBGL) 3035 MatSolve_MPIAIJ, 3036 #else 3037 0, 3038 #endif 3039 0, 3040 0, 3041 /*10*/ 0, 3042 0, 3043 0, 3044 MatSOR_MPIAIJ, 3045 MatTranspose_MPIAIJ, 3046 /*15*/ MatGetInfo_MPIAIJ, 3047 MatEqual_MPIAIJ, 3048 MatGetDiagonal_MPIAIJ, 3049 MatDiagonalScale_MPIAIJ, 3050 MatNorm_MPIAIJ, 3051 /*20*/ MatAssemblyBegin_MPIAIJ, 3052 MatAssemblyEnd_MPIAIJ, 3053 MatSetOption_MPIAIJ, 3054 MatZeroEntries_MPIAIJ, 3055 /*24*/ MatZeroRows_MPIAIJ, 3056 0, 3057 #if defined(PETSC_HAVE_PBGL) 3058 0, 3059 #else 3060 0, 3061 #endif 3062 0, 3063 0, 3064 /*29*/ MatSetUp_MPIAIJ, 3065 #if defined(PETSC_HAVE_PBGL) 3066 0, 3067 #else 3068 0, 3069 #endif 3070 0, 3071 0, 3072 0, 3073 /*34*/ MatDuplicate_MPIAIJ, 3074 0, 3075 0, 3076 0, 3077 0, 3078 /*39*/ MatAXPY_MPIAIJ, 3079 MatGetSubMatrices_MPIAIJ, 3080 MatIncreaseOverlap_MPIAIJ, 3081 MatGetValues_MPIAIJ, 3082 MatCopy_MPIAIJ, 3083 /*44*/ MatGetRowMax_MPIAIJ, 3084 MatScale_MPIAIJ, 3085 0, 3086 MatDiagonalSet_MPIAIJ, 3087 MatZeroRowsColumns_MPIAIJ, 3088 /*49*/ MatSetRandom_MPIAIJ, 3089 0, 3090 0, 3091 0, 3092 0, 3093 /*54*/ MatFDColoringCreate_MPIXAIJ, 3094 0, 3095 MatSetUnfactored_MPIAIJ, 3096 MatPermute_MPIAIJ, 3097 0, 3098 /*59*/ MatGetSubMatrix_MPIAIJ, 3099 MatDestroy_MPIAIJ, 3100 MatView_MPIAIJ, 3101 0, 3102 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3103 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3104 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3105 0, 3106 0, 3107 0, 3108 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3109 MatGetRowMinAbs_MPIAIJ, 3110 0, 3111 MatSetColoring_MPIAIJ, 3112 0, 3113 MatSetValuesAdifor_MPIAIJ, 3114 /*75*/ MatFDColoringApply_AIJ, 3115 0, 3116 0, 3117 0, 3118 MatFindZeroDiagonals_MPIAIJ, 3119 /*80*/ 0, 3120 0, 3121 0, 3122 /*83*/ MatLoad_MPIAIJ, 3123 0, 3124 0, 3125 0, 3126 0, 3127 0, 3128 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3129 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3130 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3131 MatPtAP_MPIAIJ_MPIAIJ, 3132 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3133 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3134 0, 3135 0, 3136 0, 3137 0, 3138 /*99*/ 0, 3139 0, 3140 0, 3141 MatConjugate_MPIAIJ, 3142 0, 3143 /*104*/MatSetValuesRow_MPIAIJ, 3144 MatRealPart_MPIAIJ, 3145 MatImaginaryPart_MPIAIJ, 3146 0, 3147 0, 3148 /*109*/0, 3149 MatGetRedundantMatrix_MPIAIJ, 3150 MatGetRowMin_MPIAIJ, 3151 0, 3152 0, 3153 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3154 0, 3155 0, 3156 0, 3157 0, 3158 /*119*/0, 3159 0, 3160 0, 3161 0, 3162 MatGetMultiProcBlock_MPIAIJ, 3163 /*124*/MatFindNonzeroRows_MPIAIJ, 3164 MatGetColumnNorms_MPIAIJ, 3165 MatInvertBlockDiagonal_MPIAIJ, 3166 0, 3167 MatGetSubMatricesParallel_MPIAIJ, 3168 /*129*/0, 3169 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3170 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3171 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3172 0, 3173 /*134*/0, 3174 0, 3175 0, 3176 0, 3177 0, 3178 /*139*/0, 3179 0, 3180 0, 3181 MatFDColoringSetUp_MPIXAIJ 3182 }; 3183 3184 /* ----------------------------------------------------------------------------------------*/ 3185 3186 #undef __FUNCT__ 3187 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3188 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3189 { 3190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3191 PetscErrorCode ierr; 3192 3193 PetscFunctionBegin; 3194 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3195 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3196 PetscFunctionReturn(0); 3197 } 3198 3199 #undef __FUNCT__ 3200 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3201 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3202 { 3203 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3204 PetscErrorCode ierr; 3205 3206 PetscFunctionBegin; 3207 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3208 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3209 PetscFunctionReturn(0); 3210 } 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3214 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3215 { 3216 Mat_MPIAIJ *b; 3217 PetscErrorCode ierr; 3218 3219 PetscFunctionBegin; 3220 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3221 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3222 b = (Mat_MPIAIJ*)B->data; 3223 3224 if (!B->preallocated) { 3225 /* Explicitly create 2 MATSEQAIJ matrices. */ 3226 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3227 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3228 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3229 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3230 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3231 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3232 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3233 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3234 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3235 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3236 } 3237 3238 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3239 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3240 B->preallocated = PETSC_TRUE; 3241 PetscFunctionReturn(0); 3242 } 3243 3244 #undef __FUNCT__ 3245 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3246 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3247 { 3248 Mat mat; 3249 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3250 PetscErrorCode ierr; 3251 3252 PetscFunctionBegin; 3253 *newmat = 0; 3254 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3255 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3256 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3257 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3258 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3259 a = (Mat_MPIAIJ*)mat->data; 3260 3261 mat->factortype = matin->factortype; 3262 mat->assembled = PETSC_TRUE; 3263 mat->insertmode = NOT_SET_VALUES; 3264 mat->preallocated = PETSC_TRUE; 3265 3266 a->size = oldmat->size; 3267 a->rank = oldmat->rank; 3268 a->donotstash = oldmat->donotstash; 3269 a->roworiented = oldmat->roworiented; 3270 a->rowindices = 0; 3271 a->rowvalues = 0; 3272 a->getrowactive = PETSC_FALSE; 3273 3274 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3275 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3276 3277 if (oldmat->colmap) { 3278 #if defined(PETSC_USE_CTABLE) 3279 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3280 #else 3281 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3282 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3283 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3284 #endif 3285 } else a->colmap = 0; 3286 if (oldmat->garray) { 3287 PetscInt len; 3288 len = oldmat->B->cmap->n; 3289 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3290 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3291 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3292 } else a->garray = 0; 3293 3294 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3295 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3296 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3297 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3298 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3299 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3300 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3301 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3302 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3303 *newmat = mat; 3304 PetscFunctionReturn(0); 3305 } 3306 3307 3308 3309 #undef __FUNCT__ 3310 #define __FUNCT__ "MatLoad_MPIAIJ" 3311 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3312 { 3313 PetscScalar *vals,*svals; 3314 MPI_Comm comm; 3315 PetscErrorCode ierr; 3316 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3317 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3318 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3319 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3320 PetscInt cend,cstart,n,*rowners,sizesset=1; 3321 int fd; 3322 PetscInt bs = 1; 3323 3324 PetscFunctionBegin; 3325 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3326 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3327 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3328 if (!rank) { 3329 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3330 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3331 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3332 } 3333 3334 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3335 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3336 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3337 3338 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3339 3340 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3341 M = header[1]; N = header[2]; 3342 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3343 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3344 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3345 3346 /* If global sizes are set, check if they are consistent with that given in the file */ 3347 if (sizesset) { 3348 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3349 } 3350 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3351 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3352 3353 /* determine ownership of all (block) rows */ 3354 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3355 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3356 else m = newMat->rmap->n; /* Set by user */ 3357 3358 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3359 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3360 3361 /* First process needs enough room for process with most rows */ 3362 if (!rank) { 3363 mmax = rowners[1]; 3364 for (i=2; i<=size; i++) { 3365 mmax = PetscMax(mmax, rowners[i]); 3366 } 3367 } else mmax = -1; /* unused, but compilers complain */ 3368 3369 rowners[0] = 0; 3370 for (i=2; i<=size; i++) { 3371 rowners[i] += rowners[i-1]; 3372 } 3373 rstart = rowners[rank]; 3374 rend = rowners[rank+1]; 3375 3376 /* distribute row lengths to all processors */ 3377 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3378 if (!rank) { 3379 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3380 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3381 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3382 for (j=0; j<m; j++) { 3383 procsnz[0] += ourlens[j]; 3384 } 3385 for (i=1; i<size; i++) { 3386 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3387 /* calculate the number of nonzeros on each processor */ 3388 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3389 procsnz[i] += rowlengths[j]; 3390 } 3391 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3392 } 3393 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3394 } else { 3395 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3396 } 3397 3398 if (!rank) { 3399 /* determine max buffer needed and allocate it */ 3400 maxnz = 0; 3401 for (i=0; i<size; i++) { 3402 maxnz = PetscMax(maxnz,procsnz[i]); 3403 } 3404 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3405 3406 /* read in my part of the matrix column indices */ 3407 nz = procsnz[0]; 3408 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3409 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3410 3411 /* read in every one elses and ship off */ 3412 for (i=1; i<size; i++) { 3413 nz = procsnz[i]; 3414 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3415 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3416 } 3417 ierr = PetscFree(cols);CHKERRQ(ierr); 3418 } else { 3419 /* determine buffer space needed for message */ 3420 nz = 0; 3421 for (i=0; i<m; i++) { 3422 nz += ourlens[i]; 3423 } 3424 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3425 3426 /* receive message of column indices*/ 3427 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3428 } 3429 3430 /* determine column ownership if matrix is not square */ 3431 if (N != M) { 3432 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3433 else n = newMat->cmap->n; 3434 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3435 cstart = cend - n; 3436 } else { 3437 cstart = rstart; 3438 cend = rend; 3439 n = cend - cstart; 3440 } 3441 3442 /* loop over local rows, determining number of off diagonal entries */ 3443 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3444 jj = 0; 3445 for (i=0; i<m; i++) { 3446 for (j=0; j<ourlens[i]; j++) { 3447 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3448 jj++; 3449 } 3450 } 3451 3452 for (i=0; i<m; i++) { 3453 ourlens[i] -= offlens[i]; 3454 } 3455 if (!sizesset) { 3456 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3457 } 3458 3459 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3460 3461 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3462 3463 for (i=0; i<m; i++) { 3464 ourlens[i] += offlens[i]; 3465 } 3466 3467 if (!rank) { 3468 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3469 3470 /* read in my part of the matrix numerical values */ 3471 nz = procsnz[0]; 3472 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3473 3474 /* insert into matrix */ 3475 jj = rstart; 3476 smycols = mycols; 3477 svals = vals; 3478 for (i=0; i<m; i++) { 3479 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3480 smycols += ourlens[i]; 3481 svals += ourlens[i]; 3482 jj++; 3483 } 3484 3485 /* read in other processors and ship out */ 3486 for (i=1; i<size; i++) { 3487 nz = procsnz[i]; 3488 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3489 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3490 } 3491 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3492 } else { 3493 /* receive numeric values */ 3494 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3495 3496 /* receive message of values*/ 3497 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3498 3499 /* insert into matrix */ 3500 jj = rstart; 3501 smycols = mycols; 3502 svals = vals; 3503 for (i=0; i<m; i++) { 3504 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3505 smycols += ourlens[i]; 3506 svals += ourlens[i]; 3507 jj++; 3508 } 3509 } 3510 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3511 ierr = PetscFree(vals);CHKERRQ(ierr); 3512 ierr = PetscFree(mycols);CHKERRQ(ierr); 3513 ierr = PetscFree(rowners);CHKERRQ(ierr); 3514 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3515 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3516 PetscFunctionReturn(0); 3517 } 3518 3519 #undef __FUNCT__ 3520 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3521 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3522 { 3523 PetscErrorCode ierr; 3524 IS iscol_local; 3525 PetscInt csize; 3526 3527 PetscFunctionBegin; 3528 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3529 if (call == MAT_REUSE_MATRIX) { 3530 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3531 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3532 } else { 3533 PetscInt cbs; 3534 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3535 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3536 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3537 } 3538 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3539 if (call == MAT_INITIAL_MATRIX) { 3540 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3541 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3542 } 3543 PetscFunctionReturn(0); 3544 } 3545 3546 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3547 #undef __FUNCT__ 3548 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3549 /* 3550 Not great since it makes two copies of the submatrix, first an SeqAIJ 3551 in local and then by concatenating the local matrices the end result. 3552 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3553 3554 Note: This requires a sequential iscol with all indices. 3555 */ 3556 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3557 { 3558 PetscErrorCode ierr; 3559 PetscMPIInt rank,size; 3560 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3561 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3562 PetscBool allcolumns, colflag; 3563 Mat M,Mreuse; 3564 MatScalar *vwork,*aa; 3565 MPI_Comm comm; 3566 Mat_SeqAIJ *aij; 3567 3568 PetscFunctionBegin; 3569 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3570 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3571 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3572 3573 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3574 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3575 if (colflag && ncol == mat->cmap->N) { 3576 allcolumns = PETSC_TRUE; 3577 } else { 3578 allcolumns = PETSC_FALSE; 3579 } 3580 if (call == MAT_REUSE_MATRIX) { 3581 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3582 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3583 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3584 } else { 3585 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3586 } 3587 3588 /* 3589 m - number of local rows 3590 n - number of columns (same on all processors) 3591 rstart - first row in new global matrix generated 3592 */ 3593 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3594 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3595 if (call == MAT_INITIAL_MATRIX) { 3596 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3597 ii = aij->i; 3598 jj = aij->j; 3599 3600 /* 3601 Determine the number of non-zeros in the diagonal and off-diagonal 3602 portions of the matrix in order to do correct preallocation 3603 */ 3604 3605 /* first get start and end of "diagonal" columns */ 3606 if (csize == PETSC_DECIDE) { 3607 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3608 if (mglobal == n) { /* square matrix */ 3609 nlocal = m; 3610 } else { 3611 nlocal = n/size + ((n % size) > rank); 3612 } 3613 } else { 3614 nlocal = csize; 3615 } 3616 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3617 rstart = rend - nlocal; 3618 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3619 3620 /* next, compute all the lengths */ 3621 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3622 olens = dlens + m; 3623 for (i=0; i<m; i++) { 3624 jend = ii[i+1] - ii[i]; 3625 olen = 0; 3626 dlen = 0; 3627 for (j=0; j<jend; j++) { 3628 if (*jj < rstart || *jj >= rend) olen++; 3629 else dlen++; 3630 jj++; 3631 } 3632 olens[i] = olen; 3633 dlens[i] = dlen; 3634 } 3635 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3636 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3637 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3638 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3639 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3640 ierr = PetscFree(dlens);CHKERRQ(ierr); 3641 } else { 3642 PetscInt ml,nl; 3643 3644 M = *newmat; 3645 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3646 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3647 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3648 /* 3649 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3650 rather than the slower MatSetValues(). 3651 */ 3652 M->was_assembled = PETSC_TRUE; 3653 M->assembled = PETSC_FALSE; 3654 } 3655 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3656 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3657 ii = aij->i; 3658 jj = aij->j; 3659 aa = aij->a; 3660 for (i=0; i<m; i++) { 3661 row = rstart + i; 3662 nz = ii[i+1] - ii[i]; 3663 cwork = jj; jj += nz; 3664 vwork = aa; aa += nz; 3665 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3666 } 3667 3668 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3669 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3670 *newmat = M; 3671 3672 /* save submatrix used in processor for next request */ 3673 if (call == MAT_INITIAL_MATRIX) { 3674 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3675 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3676 } 3677 PetscFunctionReturn(0); 3678 } 3679 3680 #undef __FUNCT__ 3681 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3682 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3683 { 3684 PetscInt m,cstart, cend,j,nnz,i,d; 3685 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3686 const PetscInt *JJ; 3687 PetscScalar *values; 3688 PetscErrorCode ierr; 3689 3690 PetscFunctionBegin; 3691 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3692 3693 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3694 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3695 m = B->rmap->n; 3696 cstart = B->cmap->rstart; 3697 cend = B->cmap->rend; 3698 rstart = B->rmap->rstart; 3699 3700 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3701 3702 #if defined(PETSC_USE_DEBUGGING) 3703 for (i=0; i<m; i++) { 3704 nnz = Ii[i+1]- Ii[i]; 3705 JJ = J + Ii[i]; 3706 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3707 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3708 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3709 } 3710 #endif 3711 3712 for (i=0; i<m; i++) { 3713 nnz = Ii[i+1]- Ii[i]; 3714 JJ = J + Ii[i]; 3715 nnz_max = PetscMax(nnz_max,nnz); 3716 d = 0; 3717 for (j=0; j<nnz; j++) { 3718 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3719 } 3720 d_nnz[i] = d; 3721 o_nnz[i] = nnz - d; 3722 } 3723 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3724 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3725 3726 if (v) values = (PetscScalar*)v; 3727 else { 3728 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3729 } 3730 3731 for (i=0; i<m; i++) { 3732 ii = i + rstart; 3733 nnz = Ii[i+1]- Ii[i]; 3734 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3735 } 3736 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3737 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3738 3739 if (!v) { 3740 ierr = PetscFree(values);CHKERRQ(ierr); 3741 } 3742 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3743 PetscFunctionReturn(0); 3744 } 3745 3746 #undef __FUNCT__ 3747 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3748 /*@ 3749 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3750 (the default parallel PETSc format). 3751 3752 Collective on MPI_Comm 3753 3754 Input Parameters: 3755 + B - the matrix 3756 . i - the indices into j for the start of each local row (starts with zero) 3757 . j - the column indices for each local row (starts with zero) 3758 - v - optional values in the matrix 3759 3760 Level: developer 3761 3762 Notes: 3763 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3764 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3765 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3766 3767 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3768 3769 The format which is used for the sparse matrix input, is equivalent to a 3770 row-major ordering.. i.e for the following matrix, the input data expected is 3771 as shown: 3772 3773 1 0 0 3774 2 0 3 P0 3775 ------- 3776 4 5 6 P1 3777 3778 Process0 [P0]: rows_owned=[0,1] 3779 i = {0,1,3} [size = nrow+1 = 2+1] 3780 j = {0,0,2} [size = nz = 6] 3781 v = {1,2,3} [size = nz = 6] 3782 3783 Process1 [P1]: rows_owned=[2] 3784 i = {0,3} [size = nrow+1 = 1+1] 3785 j = {0,1,2} [size = nz = 6] 3786 v = {4,5,6} [size = nz = 6] 3787 3788 .keywords: matrix, aij, compressed row, sparse, parallel 3789 3790 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3791 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3792 @*/ 3793 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3794 { 3795 PetscErrorCode ierr; 3796 3797 PetscFunctionBegin; 3798 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3799 PetscFunctionReturn(0); 3800 } 3801 3802 #undef __FUNCT__ 3803 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3804 /*@C 3805 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3806 (the default parallel PETSc format). For good matrix assembly performance 3807 the user should preallocate the matrix storage by setting the parameters 3808 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3809 performance can be increased by more than a factor of 50. 3810 3811 Collective on MPI_Comm 3812 3813 Input Parameters: 3814 + B - the matrix 3815 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3816 (same value is used for all local rows) 3817 . d_nnz - array containing the number of nonzeros in the various rows of the 3818 DIAGONAL portion of the local submatrix (possibly different for each row) 3819 or NULL, if d_nz is used to specify the nonzero structure. 3820 The size of this array is equal to the number of local rows, i.e 'm'. 3821 For matrices that will be factored, you must leave room for (and set) 3822 the diagonal entry even if it is zero. 3823 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3824 submatrix (same value is used for all local rows). 3825 - o_nnz - array containing the number of nonzeros in the various rows of the 3826 OFF-DIAGONAL portion of the local submatrix (possibly different for 3827 each row) or NULL, if o_nz is used to specify the nonzero 3828 structure. The size of this array is equal to the number 3829 of local rows, i.e 'm'. 3830 3831 If the *_nnz parameter is given then the *_nz parameter is ignored 3832 3833 The AIJ format (also called the Yale sparse matrix format or 3834 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3835 storage. The stored row and column indices begin with zero. 3836 See Users-Manual: ch_mat for details. 3837 3838 The parallel matrix is partitioned such that the first m0 rows belong to 3839 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3840 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3841 3842 The DIAGONAL portion of the local submatrix of a processor can be defined 3843 as the submatrix which is obtained by extraction the part corresponding to 3844 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3845 first row that belongs to the processor, r2 is the last row belonging to 3846 the this processor, and c1-c2 is range of indices of the local part of a 3847 vector suitable for applying the matrix to. This is an mxn matrix. In the 3848 common case of a square matrix, the row and column ranges are the same and 3849 the DIAGONAL part is also square. The remaining portion of the local 3850 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3851 3852 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3853 3854 You can call MatGetInfo() to get information on how effective the preallocation was; 3855 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3856 You can also run with the option -info and look for messages with the string 3857 malloc in them to see if additional memory allocation was needed. 3858 3859 Example usage: 3860 3861 Consider the following 8x8 matrix with 34 non-zero values, that is 3862 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3863 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3864 as follows: 3865 3866 .vb 3867 1 2 0 | 0 3 0 | 0 4 3868 Proc0 0 5 6 | 7 0 0 | 8 0 3869 9 0 10 | 11 0 0 | 12 0 3870 ------------------------------------- 3871 13 0 14 | 15 16 17 | 0 0 3872 Proc1 0 18 0 | 19 20 21 | 0 0 3873 0 0 0 | 22 23 0 | 24 0 3874 ------------------------------------- 3875 Proc2 25 26 27 | 0 0 28 | 29 0 3876 30 0 0 | 31 32 33 | 0 34 3877 .ve 3878 3879 This can be represented as a collection of submatrices as: 3880 3881 .vb 3882 A B C 3883 D E F 3884 G H I 3885 .ve 3886 3887 Where the submatrices A,B,C are owned by proc0, D,E,F are 3888 owned by proc1, G,H,I are owned by proc2. 3889 3890 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3891 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3892 The 'M','N' parameters are 8,8, and have the same values on all procs. 3893 3894 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3895 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3896 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3897 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3898 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3899 matrix, ans [DF] as another SeqAIJ matrix. 3900 3901 When d_nz, o_nz parameters are specified, d_nz storage elements are 3902 allocated for every row of the local diagonal submatrix, and o_nz 3903 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3904 One way to choose d_nz and o_nz is to use the max nonzerors per local 3905 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3906 In this case, the values of d_nz,o_nz are: 3907 .vb 3908 proc0 : dnz = 2, o_nz = 2 3909 proc1 : dnz = 3, o_nz = 2 3910 proc2 : dnz = 1, o_nz = 4 3911 .ve 3912 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3913 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3914 for proc3. i.e we are using 12+15+10=37 storage locations to store 3915 34 values. 3916 3917 When d_nnz, o_nnz parameters are specified, the storage is specified 3918 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3919 In the above case the values for d_nnz,o_nnz are: 3920 .vb 3921 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3922 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3923 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3924 .ve 3925 Here the space allocated is sum of all the above values i.e 34, and 3926 hence pre-allocation is perfect. 3927 3928 Level: intermediate 3929 3930 .keywords: matrix, aij, compressed row, sparse, parallel 3931 3932 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3933 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3934 @*/ 3935 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3936 { 3937 PetscErrorCode ierr; 3938 3939 PetscFunctionBegin; 3940 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3941 PetscValidType(B,1); 3942 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3943 PetscFunctionReturn(0); 3944 } 3945 3946 #undef __FUNCT__ 3947 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3948 /*@ 3949 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3950 CSR format the local rows. 3951 3952 Collective on MPI_Comm 3953 3954 Input Parameters: 3955 + comm - MPI communicator 3956 . m - number of local rows (Cannot be PETSC_DECIDE) 3957 . n - This value should be the same as the local size used in creating the 3958 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3959 calculated if N is given) For square matrices n is almost always m. 3960 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3961 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3962 . i - row indices 3963 . j - column indices 3964 - a - matrix values 3965 3966 Output Parameter: 3967 . mat - the matrix 3968 3969 Level: intermediate 3970 3971 Notes: 3972 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3973 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3974 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3975 3976 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3977 3978 The format which is used for the sparse matrix input, is equivalent to a 3979 row-major ordering.. i.e for the following matrix, the input data expected is 3980 as shown: 3981 3982 1 0 0 3983 2 0 3 P0 3984 ------- 3985 4 5 6 P1 3986 3987 Process0 [P0]: rows_owned=[0,1] 3988 i = {0,1,3} [size = nrow+1 = 2+1] 3989 j = {0,0,2} [size = nz = 6] 3990 v = {1,2,3} [size = nz = 6] 3991 3992 Process1 [P1]: rows_owned=[2] 3993 i = {0,3} [size = nrow+1 = 1+1] 3994 j = {0,1,2} [size = nz = 6] 3995 v = {4,5,6} [size = nz = 6] 3996 3997 .keywords: matrix, aij, compressed row, sparse, parallel 3998 3999 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4000 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4001 @*/ 4002 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4003 { 4004 PetscErrorCode ierr; 4005 4006 PetscFunctionBegin; 4007 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4008 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4009 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4010 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4011 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4012 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4013 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4014 PetscFunctionReturn(0); 4015 } 4016 4017 #undef __FUNCT__ 4018 #define __FUNCT__ "MatCreateAIJ" 4019 /*@C 4020 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4021 (the default parallel PETSc format). For good matrix assembly performance 4022 the user should preallocate the matrix storage by setting the parameters 4023 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4024 performance can be increased by more than a factor of 50. 4025 4026 Collective on MPI_Comm 4027 4028 Input Parameters: 4029 + comm - MPI communicator 4030 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4031 This value should be the same as the local size used in creating the 4032 y vector for the matrix-vector product y = Ax. 4033 . n - This value should be the same as the local size used in creating the 4034 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4035 calculated if N is given) For square matrices n is almost always m. 4036 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4037 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4038 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4039 (same value is used for all local rows) 4040 . d_nnz - array containing the number of nonzeros in the various rows of the 4041 DIAGONAL portion of the local submatrix (possibly different for each row) 4042 or NULL, if d_nz is used to specify the nonzero structure. 4043 The size of this array is equal to the number of local rows, i.e 'm'. 4044 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4045 submatrix (same value is used for all local rows). 4046 - o_nnz - array containing the number of nonzeros in the various rows of the 4047 OFF-DIAGONAL portion of the local submatrix (possibly different for 4048 each row) or NULL, if o_nz is used to specify the nonzero 4049 structure. The size of this array is equal to the number 4050 of local rows, i.e 'm'. 4051 4052 Output Parameter: 4053 . A - the matrix 4054 4055 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4056 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4057 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4058 4059 Notes: 4060 If the *_nnz parameter is given then the *_nz parameter is ignored 4061 4062 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4063 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4064 storage requirements for this matrix. 4065 4066 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4067 processor than it must be used on all processors that share the object for 4068 that argument. 4069 4070 The user MUST specify either the local or global matrix dimensions 4071 (possibly both). 4072 4073 The parallel matrix is partitioned across processors such that the 4074 first m0 rows belong to process 0, the next m1 rows belong to 4075 process 1, the next m2 rows belong to process 2 etc.. where 4076 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4077 values corresponding to [m x N] submatrix. 4078 4079 The columns are logically partitioned with the n0 columns belonging 4080 to 0th partition, the next n1 columns belonging to the next 4081 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4082 4083 The DIAGONAL portion of the local submatrix on any given processor 4084 is the submatrix corresponding to the rows and columns m,n 4085 corresponding to the given processor. i.e diagonal matrix on 4086 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4087 etc. The remaining portion of the local submatrix [m x (N-n)] 4088 constitute the OFF-DIAGONAL portion. The example below better 4089 illustrates this concept. 4090 4091 For a square global matrix we define each processor's diagonal portion 4092 to be its local rows and the corresponding columns (a square submatrix); 4093 each processor's off-diagonal portion encompasses the remainder of the 4094 local matrix (a rectangular submatrix). 4095 4096 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4097 4098 When calling this routine with a single process communicator, a matrix of 4099 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4100 type of communicator, use the construction mechanism: 4101 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4102 4103 By default, this format uses inodes (identical nodes) when possible. 4104 We search for consecutive rows with the same nonzero structure, thereby 4105 reusing matrix information to achieve increased efficiency. 4106 4107 Options Database Keys: 4108 + -mat_no_inode - Do not use inodes 4109 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4110 - -mat_aij_oneindex - Internally use indexing starting at 1 4111 rather than 0. Note that when calling MatSetValues(), 4112 the user still MUST index entries starting at 0! 4113 4114 4115 Example usage: 4116 4117 Consider the following 8x8 matrix with 34 non-zero values, that is 4118 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4119 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4120 as follows: 4121 4122 .vb 4123 1 2 0 | 0 3 0 | 0 4 4124 Proc0 0 5 6 | 7 0 0 | 8 0 4125 9 0 10 | 11 0 0 | 12 0 4126 ------------------------------------- 4127 13 0 14 | 15 16 17 | 0 0 4128 Proc1 0 18 0 | 19 20 21 | 0 0 4129 0 0 0 | 22 23 0 | 24 0 4130 ------------------------------------- 4131 Proc2 25 26 27 | 0 0 28 | 29 0 4132 30 0 0 | 31 32 33 | 0 34 4133 .ve 4134 4135 This can be represented as a collection of submatrices as: 4136 4137 .vb 4138 A B C 4139 D E F 4140 G H I 4141 .ve 4142 4143 Where the submatrices A,B,C are owned by proc0, D,E,F are 4144 owned by proc1, G,H,I are owned by proc2. 4145 4146 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4147 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4148 The 'M','N' parameters are 8,8, and have the same values on all procs. 4149 4150 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4151 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4152 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4153 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4154 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4155 matrix, ans [DF] as another SeqAIJ matrix. 4156 4157 When d_nz, o_nz parameters are specified, d_nz storage elements are 4158 allocated for every row of the local diagonal submatrix, and o_nz 4159 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4160 One way to choose d_nz and o_nz is to use the max nonzerors per local 4161 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4162 In this case, the values of d_nz,o_nz are: 4163 .vb 4164 proc0 : dnz = 2, o_nz = 2 4165 proc1 : dnz = 3, o_nz = 2 4166 proc2 : dnz = 1, o_nz = 4 4167 .ve 4168 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4169 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4170 for proc3. i.e we are using 12+15+10=37 storage locations to store 4171 34 values. 4172 4173 When d_nnz, o_nnz parameters are specified, the storage is specified 4174 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4175 In the above case the values for d_nnz,o_nnz are: 4176 .vb 4177 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4178 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4179 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4180 .ve 4181 Here the space allocated is sum of all the above values i.e 34, and 4182 hence pre-allocation is perfect. 4183 4184 Level: intermediate 4185 4186 .keywords: matrix, aij, compressed row, sparse, parallel 4187 4188 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4189 MPIAIJ, MatCreateMPIAIJWithArrays() 4190 @*/ 4191 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4192 { 4193 PetscErrorCode ierr; 4194 PetscMPIInt size; 4195 4196 PetscFunctionBegin; 4197 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4198 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4199 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4200 if (size > 1) { 4201 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4202 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4203 } else { 4204 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4205 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4206 } 4207 PetscFunctionReturn(0); 4208 } 4209 4210 #undef __FUNCT__ 4211 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4212 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4213 { 4214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4215 4216 PetscFunctionBegin; 4217 if (Ad) *Ad = a->A; 4218 if (Ao) *Ao = a->B; 4219 if (colmap) *colmap = a->garray; 4220 PetscFunctionReturn(0); 4221 } 4222 4223 #undef __FUNCT__ 4224 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4225 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4226 { 4227 PetscErrorCode ierr; 4228 PetscInt i; 4229 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4230 4231 PetscFunctionBegin; 4232 if (coloring->ctype == IS_COLORING_GLOBAL) { 4233 ISColoringValue *allcolors,*colors; 4234 ISColoring ocoloring; 4235 4236 /* set coloring for diagonal portion */ 4237 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4238 4239 /* set coloring for off-diagonal portion */ 4240 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4241 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4242 for (i=0; i<a->B->cmap->n; i++) { 4243 colors[i] = allcolors[a->garray[i]]; 4244 } 4245 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4246 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4247 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4248 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4249 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4250 ISColoringValue *colors; 4251 PetscInt *larray; 4252 ISColoring ocoloring; 4253 4254 /* set coloring for diagonal portion */ 4255 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4256 for (i=0; i<a->A->cmap->n; i++) { 4257 larray[i] = i + A->cmap->rstart; 4258 } 4259 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4260 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4261 for (i=0; i<a->A->cmap->n; i++) { 4262 colors[i] = coloring->colors[larray[i]]; 4263 } 4264 ierr = PetscFree(larray);CHKERRQ(ierr); 4265 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4266 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4267 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4268 4269 /* set coloring for off-diagonal portion */ 4270 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4271 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4272 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4273 for (i=0; i<a->B->cmap->n; i++) { 4274 colors[i] = coloring->colors[larray[i]]; 4275 } 4276 ierr = PetscFree(larray);CHKERRQ(ierr); 4277 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4278 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4279 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4280 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4281 PetscFunctionReturn(0); 4282 } 4283 4284 #undef __FUNCT__ 4285 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4286 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4287 { 4288 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4289 PetscErrorCode ierr; 4290 4291 PetscFunctionBegin; 4292 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4293 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4294 PetscFunctionReturn(0); 4295 } 4296 4297 #undef __FUNCT__ 4298 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4299 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4300 { 4301 PetscErrorCode ierr; 4302 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4303 PetscInt *indx; 4304 4305 PetscFunctionBegin; 4306 /* This routine will ONLY return MPIAIJ type matrix */ 4307 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4308 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4309 if (n == PETSC_DECIDE) { 4310 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4311 } 4312 /* Check sum(n) = N */ 4313 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4314 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4315 4316 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4317 rstart -= m; 4318 4319 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4320 for (i=0; i<m; i++) { 4321 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4322 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4323 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4324 } 4325 4326 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4327 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4328 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4329 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4330 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4331 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4332 PetscFunctionReturn(0); 4333 } 4334 4335 #undef __FUNCT__ 4336 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4337 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4338 { 4339 PetscErrorCode ierr; 4340 PetscInt m,N,i,rstart,nnz,Ii; 4341 PetscInt *indx; 4342 PetscScalar *values; 4343 4344 PetscFunctionBegin; 4345 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4346 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4347 for (i=0; i<m; i++) { 4348 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4349 Ii = i + rstart; 4350 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4351 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4352 } 4353 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4354 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4355 PetscFunctionReturn(0); 4356 } 4357 4358 #undef __FUNCT__ 4359 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4360 /*@ 4361 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4362 matrices from each processor 4363 4364 Collective on MPI_Comm 4365 4366 Input Parameters: 4367 + comm - the communicators the parallel matrix will live on 4368 . inmat - the input sequential matrices 4369 . n - number of local columns (or PETSC_DECIDE) 4370 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4371 4372 Output Parameter: 4373 . outmat - the parallel matrix generated 4374 4375 Level: advanced 4376 4377 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4378 4379 @*/ 4380 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4381 { 4382 PetscErrorCode ierr; 4383 PetscMPIInt size; 4384 4385 PetscFunctionBegin; 4386 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4387 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4388 if (size == 1) { 4389 if (scall == MAT_INITIAL_MATRIX) { 4390 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4391 } else { 4392 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4393 } 4394 } else { 4395 if (scall == MAT_INITIAL_MATRIX) { 4396 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4397 } 4398 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4399 } 4400 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4401 PetscFunctionReturn(0); 4402 } 4403 4404 #undef __FUNCT__ 4405 #define __FUNCT__ "MatFileSplit" 4406 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4407 { 4408 PetscErrorCode ierr; 4409 PetscMPIInt rank; 4410 PetscInt m,N,i,rstart,nnz; 4411 size_t len; 4412 const PetscInt *indx; 4413 PetscViewer out; 4414 char *name; 4415 Mat B; 4416 const PetscScalar *values; 4417 4418 PetscFunctionBegin; 4419 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4420 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4421 /* Should this be the type of the diagonal block of A? */ 4422 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4423 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4424 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4425 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4426 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4427 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4428 for (i=0; i<m; i++) { 4429 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4430 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4431 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4432 } 4433 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4434 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4435 4436 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4437 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4438 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4439 sprintf(name,"%s.%d",outfile,rank); 4440 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4441 ierr = PetscFree(name);CHKERRQ(ierr); 4442 ierr = MatView(B,out);CHKERRQ(ierr); 4443 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4444 ierr = MatDestroy(&B);CHKERRQ(ierr); 4445 PetscFunctionReturn(0); 4446 } 4447 4448 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4449 #undef __FUNCT__ 4450 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4451 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4452 { 4453 PetscErrorCode ierr; 4454 Mat_Merge_SeqsToMPI *merge; 4455 PetscContainer container; 4456 4457 PetscFunctionBegin; 4458 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4459 if (container) { 4460 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4461 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4462 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4463 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4464 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4465 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4466 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4467 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4468 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4469 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4470 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4471 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4472 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4473 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4474 ierr = PetscFree(merge);CHKERRQ(ierr); 4475 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4476 } 4477 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4478 PetscFunctionReturn(0); 4479 } 4480 4481 #include <../src/mat/utils/freespace.h> 4482 #include <petscbt.h> 4483 4484 #undef __FUNCT__ 4485 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4486 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4487 { 4488 PetscErrorCode ierr; 4489 MPI_Comm comm; 4490 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4491 PetscMPIInt size,rank,taga,*len_s; 4492 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4493 PetscInt proc,m; 4494 PetscInt **buf_ri,**buf_rj; 4495 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4496 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4497 MPI_Request *s_waits,*r_waits; 4498 MPI_Status *status; 4499 MatScalar *aa=a->a; 4500 MatScalar **abuf_r,*ba_i; 4501 Mat_Merge_SeqsToMPI *merge; 4502 PetscContainer container; 4503 4504 PetscFunctionBegin; 4505 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4506 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4507 4508 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4509 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4510 4511 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4512 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4513 4514 bi = merge->bi; 4515 bj = merge->bj; 4516 buf_ri = merge->buf_ri; 4517 buf_rj = merge->buf_rj; 4518 4519 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4520 owners = merge->rowmap->range; 4521 len_s = merge->len_s; 4522 4523 /* send and recv matrix values */ 4524 /*-----------------------------*/ 4525 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4526 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4527 4528 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4529 for (proc=0,k=0; proc<size; proc++) { 4530 if (!len_s[proc]) continue; 4531 i = owners[proc]; 4532 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4533 k++; 4534 } 4535 4536 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4537 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4538 ierr = PetscFree(status);CHKERRQ(ierr); 4539 4540 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4541 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4542 4543 /* insert mat values of mpimat */ 4544 /*----------------------------*/ 4545 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4546 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4547 4548 for (k=0; k<merge->nrecv; k++) { 4549 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4550 nrows = *(buf_ri_k[k]); 4551 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4552 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4553 } 4554 4555 /* set values of ba */ 4556 m = merge->rowmap->n; 4557 for (i=0; i<m; i++) { 4558 arow = owners[rank] + i; 4559 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4560 bnzi = bi[i+1] - bi[i]; 4561 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4562 4563 /* add local non-zero vals of this proc's seqmat into ba */ 4564 anzi = ai[arow+1] - ai[arow]; 4565 aj = a->j + ai[arow]; 4566 aa = a->a + ai[arow]; 4567 nextaj = 0; 4568 for (j=0; nextaj<anzi; j++) { 4569 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4570 ba_i[j] += aa[nextaj++]; 4571 } 4572 } 4573 4574 /* add received vals into ba */ 4575 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4576 /* i-th row */ 4577 if (i == *nextrow[k]) { 4578 anzi = *(nextai[k]+1) - *nextai[k]; 4579 aj = buf_rj[k] + *(nextai[k]); 4580 aa = abuf_r[k] + *(nextai[k]); 4581 nextaj = 0; 4582 for (j=0; nextaj<anzi; j++) { 4583 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4584 ba_i[j] += aa[nextaj++]; 4585 } 4586 } 4587 nextrow[k]++; nextai[k]++; 4588 } 4589 } 4590 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4591 } 4592 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4593 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4594 4595 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4596 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4597 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4598 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4599 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4600 PetscFunctionReturn(0); 4601 } 4602 4603 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4604 4605 #undef __FUNCT__ 4606 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4607 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4608 { 4609 PetscErrorCode ierr; 4610 Mat B_mpi; 4611 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4612 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4613 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4614 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4615 PetscInt len,proc,*dnz,*onz,bs,cbs; 4616 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4617 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4618 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4619 MPI_Status *status; 4620 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4621 PetscBT lnkbt; 4622 Mat_Merge_SeqsToMPI *merge; 4623 PetscContainer container; 4624 4625 PetscFunctionBegin; 4626 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4627 4628 /* make sure it is a PETSc comm */ 4629 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4630 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4631 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4632 4633 ierr = PetscNew(&merge);CHKERRQ(ierr); 4634 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4635 4636 /* determine row ownership */ 4637 /*---------------------------------------------------------*/ 4638 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4639 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4640 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4641 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4642 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4643 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4644 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4645 4646 m = merge->rowmap->n; 4647 owners = merge->rowmap->range; 4648 4649 /* determine the number of messages to send, their lengths */ 4650 /*---------------------------------------------------------*/ 4651 len_s = merge->len_s; 4652 4653 len = 0; /* length of buf_si[] */ 4654 merge->nsend = 0; 4655 for (proc=0; proc<size; proc++) { 4656 len_si[proc] = 0; 4657 if (proc == rank) { 4658 len_s[proc] = 0; 4659 } else { 4660 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4661 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4662 } 4663 if (len_s[proc]) { 4664 merge->nsend++; 4665 nrows = 0; 4666 for (i=owners[proc]; i<owners[proc+1]; i++) { 4667 if (ai[i+1] > ai[i]) nrows++; 4668 } 4669 len_si[proc] = 2*(nrows+1); 4670 len += len_si[proc]; 4671 } 4672 } 4673 4674 /* determine the number and length of messages to receive for ij-structure */ 4675 /*-------------------------------------------------------------------------*/ 4676 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4677 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4678 4679 /* post the Irecv of j-structure */ 4680 /*-------------------------------*/ 4681 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4682 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4683 4684 /* post the Isend of j-structure */ 4685 /*--------------------------------*/ 4686 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4687 4688 for (proc=0, k=0; proc<size; proc++) { 4689 if (!len_s[proc]) continue; 4690 i = owners[proc]; 4691 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4692 k++; 4693 } 4694 4695 /* receives and sends of j-structure are complete */ 4696 /*------------------------------------------------*/ 4697 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4698 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4699 4700 /* send and recv i-structure */ 4701 /*---------------------------*/ 4702 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4703 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4704 4705 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4706 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4707 for (proc=0,k=0; proc<size; proc++) { 4708 if (!len_s[proc]) continue; 4709 /* form outgoing message for i-structure: 4710 buf_si[0]: nrows to be sent 4711 [1:nrows]: row index (global) 4712 [nrows+1:2*nrows+1]: i-structure index 4713 */ 4714 /*-------------------------------------------*/ 4715 nrows = len_si[proc]/2 - 1; 4716 buf_si_i = buf_si + nrows+1; 4717 buf_si[0] = nrows; 4718 buf_si_i[0] = 0; 4719 nrows = 0; 4720 for (i=owners[proc]; i<owners[proc+1]; i++) { 4721 anzi = ai[i+1] - ai[i]; 4722 if (anzi) { 4723 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4724 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4725 nrows++; 4726 } 4727 } 4728 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4729 k++; 4730 buf_si += len_si[proc]; 4731 } 4732 4733 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4734 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4735 4736 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4737 for (i=0; i<merge->nrecv; i++) { 4738 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4739 } 4740 4741 ierr = PetscFree(len_si);CHKERRQ(ierr); 4742 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4743 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4744 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4745 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4746 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4747 ierr = PetscFree(status);CHKERRQ(ierr); 4748 4749 /* compute a local seq matrix in each processor */ 4750 /*----------------------------------------------*/ 4751 /* allocate bi array and free space for accumulating nonzero column info */ 4752 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4753 bi[0] = 0; 4754 4755 /* create and initialize a linked list */ 4756 nlnk = N+1; 4757 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4758 4759 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4760 len = ai[owners[rank+1]] - ai[owners[rank]]; 4761 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4762 4763 current_space = free_space; 4764 4765 /* determine symbolic info for each local row */ 4766 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4767 4768 for (k=0; k<merge->nrecv; k++) { 4769 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4770 nrows = *buf_ri_k[k]; 4771 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4772 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4773 } 4774 4775 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4776 len = 0; 4777 for (i=0; i<m; i++) { 4778 bnzi = 0; 4779 /* add local non-zero cols of this proc's seqmat into lnk */ 4780 arow = owners[rank] + i; 4781 anzi = ai[arow+1] - ai[arow]; 4782 aj = a->j + ai[arow]; 4783 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4784 bnzi += nlnk; 4785 /* add received col data into lnk */ 4786 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4787 if (i == *nextrow[k]) { /* i-th row */ 4788 anzi = *(nextai[k]+1) - *nextai[k]; 4789 aj = buf_rj[k] + *nextai[k]; 4790 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4791 bnzi += nlnk; 4792 nextrow[k]++; nextai[k]++; 4793 } 4794 } 4795 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4796 4797 /* if free space is not available, make more free space */ 4798 if (current_space->local_remaining<bnzi) { 4799 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4800 nspacedouble++; 4801 } 4802 /* copy data into free space, then initialize lnk */ 4803 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4804 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4805 4806 current_space->array += bnzi; 4807 current_space->local_used += bnzi; 4808 current_space->local_remaining -= bnzi; 4809 4810 bi[i+1] = bi[i] + bnzi; 4811 } 4812 4813 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4814 4815 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4816 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4817 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4818 4819 /* create symbolic parallel matrix B_mpi */ 4820 /*---------------------------------------*/ 4821 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4822 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4823 if (n==PETSC_DECIDE) { 4824 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4825 } else { 4826 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4827 } 4828 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4829 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4830 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4831 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4832 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4833 4834 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4835 B_mpi->assembled = PETSC_FALSE; 4836 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4837 merge->bi = bi; 4838 merge->bj = bj; 4839 merge->buf_ri = buf_ri; 4840 merge->buf_rj = buf_rj; 4841 merge->coi = NULL; 4842 merge->coj = NULL; 4843 merge->owners_co = NULL; 4844 4845 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4846 4847 /* attach the supporting struct to B_mpi for reuse */ 4848 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4849 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4850 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4851 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4852 *mpimat = B_mpi; 4853 4854 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4855 PetscFunctionReturn(0); 4856 } 4857 4858 #undef __FUNCT__ 4859 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4860 /*@C 4861 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4862 matrices from each processor 4863 4864 Collective on MPI_Comm 4865 4866 Input Parameters: 4867 + comm - the communicators the parallel matrix will live on 4868 . seqmat - the input sequential matrices 4869 . m - number of local rows (or PETSC_DECIDE) 4870 . n - number of local columns (or PETSC_DECIDE) 4871 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4872 4873 Output Parameter: 4874 . mpimat - the parallel matrix generated 4875 4876 Level: advanced 4877 4878 Notes: 4879 The dimensions of the sequential matrix in each processor MUST be the same. 4880 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4881 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4882 @*/ 4883 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4884 { 4885 PetscErrorCode ierr; 4886 PetscMPIInt size; 4887 4888 PetscFunctionBegin; 4889 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4890 if (size == 1) { 4891 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4892 if (scall == MAT_INITIAL_MATRIX) { 4893 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4894 } else { 4895 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4896 } 4897 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4898 PetscFunctionReturn(0); 4899 } 4900 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4901 if (scall == MAT_INITIAL_MATRIX) { 4902 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4903 } 4904 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4905 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4906 PetscFunctionReturn(0); 4907 } 4908 4909 #undef __FUNCT__ 4910 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4911 /*@ 4912 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4913 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4914 with MatGetSize() 4915 4916 Not Collective 4917 4918 Input Parameters: 4919 + A - the matrix 4920 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4921 4922 Output Parameter: 4923 . A_loc - the local sequential matrix generated 4924 4925 Level: developer 4926 4927 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4928 4929 @*/ 4930 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4931 { 4932 PetscErrorCode ierr; 4933 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4934 Mat_SeqAIJ *mat,*a,*b; 4935 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4936 MatScalar *aa,*ba,*cam; 4937 PetscScalar *ca; 4938 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4939 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4940 PetscBool match; 4941 MPI_Comm comm; 4942 PetscMPIInt size; 4943 4944 PetscFunctionBegin; 4945 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4946 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4947 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4948 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4949 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4950 4951 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4952 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4953 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4954 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4955 aa = a->a; ba = b->a; 4956 if (scall == MAT_INITIAL_MATRIX) { 4957 if (size == 1) { 4958 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4959 PetscFunctionReturn(0); 4960 } 4961 4962 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4963 ci[0] = 0; 4964 for (i=0; i<am; i++) { 4965 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4966 } 4967 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4968 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4969 k = 0; 4970 for (i=0; i<am; i++) { 4971 ncols_o = bi[i+1] - bi[i]; 4972 ncols_d = ai[i+1] - ai[i]; 4973 /* off-diagonal portion of A */ 4974 for (jo=0; jo<ncols_o; jo++) { 4975 col = cmap[*bj]; 4976 if (col >= cstart) break; 4977 cj[k] = col; bj++; 4978 ca[k++] = *ba++; 4979 } 4980 /* diagonal portion of A */ 4981 for (j=0; j<ncols_d; j++) { 4982 cj[k] = cstart + *aj++; 4983 ca[k++] = *aa++; 4984 } 4985 /* off-diagonal portion of A */ 4986 for (j=jo; j<ncols_o; j++) { 4987 cj[k] = cmap[*bj++]; 4988 ca[k++] = *ba++; 4989 } 4990 } 4991 /* put together the new matrix */ 4992 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4993 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4994 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4995 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4996 mat->free_a = PETSC_TRUE; 4997 mat->free_ij = PETSC_TRUE; 4998 mat->nonew = 0; 4999 } else if (scall == MAT_REUSE_MATRIX) { 5000 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5001 ci = mat->i; cj = mat->j; cam = mat->a; 5002 for (i=0; i<am; i++) { 5003 /* off-diagonal portion of A */ 5004 ncols_o = bi[i+1] - bi[i]; 5005 for (jo=0; jo<ncols_o; jo++) { 5006 col = cmap[*bj]; 5007 if (col >= cstart) break; 5008 *cam++ = *ba++; bj++; 5009 } 5010 /* diagonal portion of A */ 5011 ncols_d = ai[i+1] - ai[i]; 5012 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5013 /* off-diagonal portion of A */ 5014 for (j=jo; j<ncols_o; j++) { 5015 *cam++ = *ba++; bj++; 5016 } 5017 } 5018 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5019 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5020 PetscFunctionReturn(0); 5021 } 5022 5023 #undef __FUNCT__ 5024 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5025 /*@C 5026 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5027 5028 Not Collective 5029 5030 Input Parameters: 5031 + A - the matrix 5032 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5033 - row, col - index sets of rows and columns to extract (or NULL) 5034 5035 Output Parameter: 5036 . A_loc - the local sequential matrix generated 5037 5038 Level: developer 5039 5040 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5041 5042 @*/ 5043 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5044 { 5045 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5046 PetscErrorCode ierr; 5047 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5048 IS isrowa,iscola; 5049 Mat *aloc; 5050 PetscBool match; 5051 5052 PetscFunctionBegin; 5053 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5054 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5055 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5056 if (!row) { 5057 start = A->rmap->rstart; end = A->rmap->rend; 5058 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5059 } else { 5060 isrowa = *row; 5061 } 5062 if (!col) { 5063 start = A->cmap->rstart; 5064 cmap = a->garray; 5065 nzA = a->A->cmap->n; 5066 nzB = a->B->cmap->n; 5067 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5068 ncols = 0; 5069 for (i=0; i<nzB; i++) { 5070 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5071 else break; 5072 } 5073 imark = i; 5074 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5075 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5076 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5077 } else { 5078 iscola = *col; 5079 } 5080 if (scall != MAT_INITIAL_MATRIX) { 5081 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5082 aloc[0] = *A_loc; 5083 } 5084 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5085 *A_loc = aloc[0]; 5086 ierr = PetscFree(aloc);CHKERRQ(ierr); 5087 if (!row) { 5088 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5089 } 5090 if (!col) { 5091 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5092 } 5093 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5094 PetscFunctionReturn(0); 5095 } 5096 5097 #undef __FUNCT__ 5098 #define __FUNCT__ "MatGetBrowsOfAcols" 5099 /*@C 5100 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5101 5102 Collective on Mat 5103 5104 Input Parameters: 5105 + A,B - the matrices in mpiaij format 5106 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5107 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5108 5109 Output Parameter: 5110 + rowb, colb - index sets of rows and columns of B to extract 5111 - B_seq - the sequential matrix generated 5112 5113 Level: developer 5114 5115 @*/ 5116 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5117 { 5118 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5119 PetscErrorCode ierr; 5120 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5121 IS isrowb,iscolb; 5122 Mat *bseq=NULL; 5123 5124 PetscFunctionBegin; 5125 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5126 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5127 } 5128 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5129 5130 if (scall == MAT_INITIAL_MATRIX) { 5131 start = A->cmap->rstart; 5132 cmap = a->garray; 5133 nzA = a->A->cmap->n; 5134 nzB = a->B->cmap->n; 5135 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5136 ncols = 0; 5137 for (i=0; i<nzB; i++) { /* row < local row index */ 5138 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5139 else break; 5140 } 5141 imark = i; 5142 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5143 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5144 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5145 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5146 } else { 5147 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5148 isrowb = *rowb; iscolb = *colb; 5149 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5150 bseq[0] = *B_seq; 5151 } 5152 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5153 *B_seq = bseq[0]; 5154 ierr = PetscFree(bseq);CHKERRQ(ierr); 5155 if (!rowb) { 5156 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5157 } else { 5158 *rowb = isrowb; 5159 } 5160 if (!colb) { 5161 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5162 } else { 5163 *colb = iscolb; 5164 } 5165 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5166 PetscFunctionReturn(0); 5167 } 5168 5169 #undef __FUNCT__ 5170 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5171 /* 5172 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5173 of the OFF-DIAGONAL portion of local A 5174 5175 Collective on Mat 5176 5177 Input Parameters: 5178 + A,B - the matrices in mpiaij format 5179 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5180 5181 Output Parameter: 5182 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5183 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5184 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5185 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5186 5187 Level: developer 5188 5189 */ 5190 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5191 { 5192 VecScatter_MPI_General *gen_to,*gen_from; 5193 PetscErrorCode ierr; 5194 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5195 Mat_SeqAIJ *b_oth; 5196 VecScatter ctx =a->Mvctx; 5197 MPI_Comm comm; 5198 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5199 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5200 PetscScalar *rvalues,*svalues; 5201 MatScalar *b_otha,*bufa,*bufA; 5202 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5203 MPI_Request *rwaits = NULL,*swaits = NULL; 5204 MPI_Status *sstatus,rstatus; 5205 PetscMPIInt jj,size; 5206 PetscInt *cols,sbs,rbs; 5207 PetscScalar *vals; 5208 5209 PetscFunctionBegin; 5210 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5211 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5212 if (size == 1) PetscFunctionReturn(0); 5213 5214 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5215 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5216 } 5217 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5218 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5219 5220 gen_to = (VecScatter_MPI_General*)ctx->todata; 5221 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5222 rvalues = gen_from->values; /* holds the length of receiving row */ 5223 svalues = gen_to->values; /* holds the length of sending row */ 5224 nrecvs = gen_from->n; 5225 nsends = gen_to->n; 5226 5227 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5228 srow = gen_to->indices; /* local row index to be sent */ 5229 sstarts = gen_to->starts; 5230 sprocs = gen_to->procs; 5231 sstatus = gen_to->sstatus; 5232 sbs = gen_to->bs; 5233 rstarts = gen_from->starts; 5234 rprocs = gen_from->procs; 5235 rbs = gen_from->bs; 5236 5237 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5238 if (scall == MAT_INITIAL_MATRIX) { 5239 /* i-array */ 5240 /*---------*/ 5241 /* post receives */ 5242 for (i=0; i<nrecvs; i++) { 5243 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5244 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5245 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5246 } 5247 5248 /* pack the outgoing message */ 5249 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5250 5251 sstartsj[0] = 0; 5252 rstartsj[0] = 0; 5253 len = 0; /* total length of j or a array to be sent */ 5254 k = 0; 5255 for (i=0; i<nsends; i++) { 5256 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5257 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5258 for (j=0; j<nrows; j++) { 5259 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5260 for (l=0; l<sbs; l++) { 5261 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5262 5263 rowlen[j*sbs+l] = ncols; 5264 5265 len += ncols; 5266 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5267 } 5268 k++; 5269 } 5270 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5271 5272 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5273 } 5274 /* recvs and sends of i-array are completed */ 5275 i = nrecvs; 5276 while (i--) { 5277 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5278 } 5279 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5280 5281 /* allocate buffers for sending j and a arrays */ 5282 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5283 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5284 5285 /* create i-array of B_oth */ 5286 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5287 5288 b_othi[0] = 0; 5289 len = 0; /* total length of j or a array to be received */ 5290 k = 0; 5291 for (i=0; i<nrecvs; i++) { 5292 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5293 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5294 for (j=0; j<nrows; j++) { 5295 b_othi[k+1] = b_othi[k] + rowlen[j]; 5296 len += rowlen[j]; k++; 5297 } 5298 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5299 } 5300 5301 /* allocate space for j and a arrrays of B_oth */ 5302 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5303 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5304 5305 /* j-array */ 5306 /*---------*/ 5307 /* post receives of j-array */ 5308 for (i=0; i<nrecvs; i++) { 5309 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5310 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5311 } 5312 5313 /* pack the outgoing message j-array */ 5314 k = 0; 5315 for (i=0; i<nsends; i++) { 5316 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5317 bufJ = bufj+sstartsj[i]; 5318 for (j=0; j<nrows; j++) { 5319 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5320 for (ll=0; ll<sbs; ll++) { 5321 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5322 for (l=0; l<ncols; l++) { 5323 *bufJ++ = cols[l]; 5324 } 5325 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5326 } 5327 } 5328 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5329 } 5330 5331 /* recvs and sends of j-array are completed */ 5332 i = nrecvs; 5333 while (i--) { 5334 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5335 } 5336 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5337 } else if (scall == MAT_REUSE_MATRIX) { 5338 sstartsj = *startsj_s; 5339 rstartsj = *startsj_r; 5340 bufa = *bufa_ptr; 5341 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5342 b_otha = b_oth->a; 5343 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5344 5345 /* a-array */ 5346 /*---------*/ 5347 /* post receives of a-array */ 5348 for (i=0; i<nrecvs; i++) { 5349 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5350 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5351 } 5352 5353 /* pack the outgoing message a-array */ 5354 k = 0; 5355 for (i=0; i<nsends; i++) { 5356 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5357 bufA = bufa+sstartsj[i]; 5358 for (j=0; j<nrows; j++) { 5359 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5360 for (ll=0; ll<sbs; ll++) { 5361 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5362 for (l=0; l<ncols; l++) { 5363 *bufA++ = vals[l]; 5364 } 5365 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5366 } 5367 } 5368 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5369 } 5370 /* recvs and sends of a-array are completed */ 5371 i = nrecvs; 5372 while (i--) { 5373 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5374 } 5375 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5376 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5377 5378 if (scall == MAT_INITIAL_MATRIX) { 5379 /* put together the new matrix */ 5380 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5381 5382 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5383 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5384 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5385 b_oth->free_a = PETSC_TRUE; 5386 b_oth->free_ij = PETSC_TRUE; 5387 b_oth->nonew = 0; 5388 5389 ierr = PetscFree(bufj);CHKERRQ(ierr); 5390 if (!startsj_s || !bufa_ptr) { 5391 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5392 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5393 } else { 5394 *startsj_s = sstartsj; 5395 *startsj_r = rstartsj; 5396 *bufa_ptr = bufa; 5397 } 5398 } 5399 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5400 PetscFunctionReturn(0); 5401 } 5402 5403 #undef __FUNCT__ 5404 #define __FUNCT__ "MatGetCommunicationStructs" 5405 /*@C 5406 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5407 5408 Not Collective 5409 5410 Input Parameters: 5411 . A - The matrix in mpiaij format 5412 5413 Output Parameter: 5414 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5415 . colmap - A map from global column index to local index into lvec 5416 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5417 5418 Level: developer 5419 5420 @*/ 5421 #if defined(PETSC_USE_CTABLE) 5422 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5423 #else 5424 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5425 #endif 5426 { 5427 Mat_MPIAIJ *a; 5428 5429 PetscFunctionBegin; 5430 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5431 PetscValidPointer(lvec, 2); 5432 PetscValidPointer(colmap, 3); 5433 PetscValidPointer(multScatter, 4); 5434 a = (Mat_MPIAIJ*) A->data; 5435 if (lvec) *lvec = a->lvec; 5436 if (colmap) *colmap = a->colmap; 5437 if (multScatter) *multScatter = a->Mvctx; 5438 PetscFunctionReturn(0); 5439 } 5440 5441 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5442 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5443 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5444 #if defined(PETSC_HAVE_ELEMENTAL) 5445 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5446 #endif 5447 5448 #undef __FUNCT__ 5449 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5450 /* 5451 Computes (B'*A')' since computing B*A directly is untenable 5452 5453 n p p 5454 ( ) ( ) ( ) 5455 m ( A ) * n ( B ) = m ( C ) 5456 ( ) ( ) ( ) 5457 5458 */ 5459 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5460 { 5461 PetscErrorCode ierr; 5462 Mat At,Bt,Ct; 5463 5464 PetscFunctionBegin; 5465 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5466 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5467 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5468 ierr = MatDestroy(&At);CHKERRQ(ierr); 5469 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5470 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5471 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5472 PetscFunctionReturn(0); 5473 } 5474 5475 #undef __FUNCT__ 5476 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5477 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5478 { 5479 PetscErrorCode ierr; 5480 PetscInt m=A->rmap->n,n=B->cmap->n; 5481 Mat Cmat; 5482 5483 PetscFunctionBegin; 5484 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5485 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5486 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5487 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5488 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5489 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5490 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5491 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5492 5493 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5494 5495 *C = Cmat; 5496 PetscFunctionReturn(0); 5497 } 5498 5499 /* ----------------------------------------------------------------*/ 5500 #undef __FUNCT__ 5501 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5502 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5503 { 5504 PetscErrorCode ierr; 5505 5506 PetscFunctionBegin; 5507 if (scall == MAT_INITIAL_MATRIX) { 5508 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5509 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5510 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5511 } 5512 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5513 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5514 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5515 PetscFunctionReturn(0); 5516 } 5517 5518 #if defined(PETSC_HAVE_MUMPS) 5519 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5520 #endif 5521 #if defined(PETSC_HAVE_PASTIX) 5522 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5523 #endif 5524 #if defined(PETSC_HAVE_SUPERLU_DIST) 5525 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5526 #endif 5527 #if defined(PETSC_HAVE_CLIQUE) 5528 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5529 #endif 5530 5531 /*MC 5532 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5533 5534 Options Database Keys: 5535 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5536 5537 Level: beginner 5538 5539 .seealso: MatCreateAIJ() 5540 M*/ 5541 5542 #undef __FUNCT__ 5543 #define __FUNCT__ "MatCreate_MPIAIJ" 5544 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5545 { 5546 Mat_MPIAIJ *b; 5547 PetscErrorCode ierr; 5548 PetscMPIInt size; 5549 5550 PetscFunctionBegin; 5551 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5552 5553 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5554 B->data = (void*)b; 5555 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5556 B->assembled = PETSC_FALSE; 5557 B->insertmode = NOT_SET_VALUES; 5558 b->size = size; 5559 5560 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5561 5562 /* build cache for off array entries formed */ 5563 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5564 5565 b->donotstash = PETSC_FALSE; 5566 b->colmap = 0; 5567 b->garray = 0; 5568 b->roworiented = PETSC_TRUE; 5569 5570 /* stuff used for matrix vector multiply */ 5571 b->lvec = NULL; 5572 b->Mvctx = NULL; 5573 5574 /* stuff for MatGetRow() */ 5575 b->rowindices = 0; 5576 b->rowvalues = 0; 5577 b->getrowactive = PETSC_FALSE; 5578 5579 /* flexible pointer used in CUSP/CUSPARSE classes */ 5580 b->spptr = NULL; 5581 5582 #if defined(PETSC_HAVE_MUMPS) 5583 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5584 #endif 5585 #if defined(PETSC_HAVE_PASTIX) 5586 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5587 #endif 5588 #if defined(PETSC_HAVE_SUPERLU_DIST) 5589 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5590 #endif 5591 #if defined(PETSC_HAVE_CLIQUE) 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5593 #endif 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5604 #if defined(PETSC_HAVE_ELEMENTAL) 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5606 #endif 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5611 PetscFunctionReturn(0); 5612 } 5613 5614 #undef __FUNCT__ 5615 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5616 /*@ 5617 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5618 and "off-diagonal" part of the matrix in CSR format. 5619 5620 Collective on MPI_Comm 5621 5622 Input Parameters: 5623 + comm - MPI communicator 5624 . m - number of local rows (Cannot be PETSC_DECIDE) 5625 . n - This value should be the same as the local size used in creating the 5626 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5627 calculated if N is given) For square matrices n is almost always m. 5628 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5629 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5630 . i - row indices for "diagonal" portion of matrix 5631 . j - column indices 5632 . a - matrix values 5633 . oi - row indices for "off-diagonal" portion of matrix 5634 . oj - column indices 5635 - oa - matrix values 5636 5637 Output Parameter: 5638 . mat - the matrix 5639 5640 Level: advanced 5641 5642 Notes: 5643 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5644 must free the arrays once the matrix has been destroyed and not before. 5645 5646 The i and j indices are 0 based 5647 5648 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5649 5650 This sets local rows and cannot be used to set off-processor values. 5651 5652 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5653 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5654 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5655 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5656 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5657 communication if it is known that only local entries will be set. 5658 5659 .keywords: matrix, aij, compressed row, sparse, parallel 5660 5661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5662 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5663 @*/ 5664 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5665 { 5666 PetscErrorCode ierr; 5667 Mat_MPIAIJ *maij; 5668 5669 PetscFunctionBegin; 5670 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5671 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5672 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5673 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5674 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5675 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5676 maij = (Mat_MPIAIJ*) (*mat)->data; 5677 5678 (*mat)->preallocated = PETSC_TRUE; 5679 5680 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5681 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5682 5683 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5684 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5685 5686 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5687 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5688 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5689 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5690 5691 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5692 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5693 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5694 PetscFunctionReturn(0); 5695 } 5696 5697 /* 5698 Special version for direct calls from Fortran 5699 */ 5700 #include <petsc-private/fortranimpl.h> 5701 5702 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5703 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5704 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5705 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5706 #endif 5707 5708 /* Change these macros so can be used in void function */ 5709 #undef CHKERRQ 5710 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5711 #undef SETERRQ2 5712 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5713 #undef SETERRQ3 5714 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5715 #undef SETERRQ 5716 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5717 5718 #undef __FUNCT__ 5719 #define __FUNCT__ "matsetvaluesmpiaij_" 5720 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5721 { 5722 Mat mat = *mmat; 5723 PetscInt m = *mm, n = *mn; 5724 InsertMode addv = *maddv; 5725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5726 PetscScalar value; 5727 PetscErrorCode ierr; 5728 5729 MatCheckPreallocated(mat,1); 5730 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5731 5732 #if defined(PETSC_USE_DEBUG) 5733 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5734 #endif 5735 { 5736 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5737 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5738 PetscBool roworiented = aij->roworiented; 5739 5740 /* Some Variables required in the macro */ 5741 Mat A = aij->A; 5742 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5743 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5744 MatScalar *aa = a->a; 5745 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5746 Mat B = aij->B; 5747 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5748 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5749 MatScalar *ba = b->a; 5750 5751 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5752 PetscInt nonew = a->nonew; 5753 MatScalar *ap1,*ap2; 5754 5755 PetscFunctionBegin; 5756 for (i=0; i<m; i++) { 5757 if (im[i] < 0) continue; 5758 #if defined(PETSC_USE_DEBUG) 5759 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5760 #endif 5761 if (im[i] >= rstart && im[i] < rend) { 5762 row = im[i] - rstart; 5763 lastcol1 = -1; 5764 rp1 = aj + ai[row]; 5765 ap1 = aa + ai[row]; 5766 rmax1 = aimax[row]; 5767 nrow1 = ailen[row]; 5768 low1 = 0; 5769 high1 = nrow1; 5770 lastcol2 = -1; 5771 rp2 = bj + bi[row]; 5772 ap2 = ba + bi[row]; 5773 rmax2 = bimax[row]; 5774 nrow2 = bilen[row]; 5775 low2 = 0; 5776 high2 = nrow2; 5777 5778 for (j=0; j<n; j++) { 5779 if (roworiented) value = v[i*n+j]; 5780 else value = v[i+j*m]; 5781 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5782 if (in[j] >= cstart && in[j] < cend) { 5783 col = in[j] - cstart; 5784 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5785 } else if (in[j] < 0) continue; 5786 #if defined(PETSC_USE_DEBUG) 5787 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5788 #endif 5789 else { 5790 if (mat->was_assembled) { 5791 if (!aij->colmap) { 5792 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5793 } 5794 #if defined(PETSC_USE_CTABLE) 5795 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5796 col--; 5797 #else 5798 col = aij->colmap[in[j]] - 1; 5799 #endif 5800 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5801 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5802 col = in[j]; 5803 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5804 B = aij->B; 5805 b = (Mat_SeqAIJ*)B->data; 5806 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5807 rp2 = bj + bi[row]; 5808 ap2 = ba + bi[row]; 5809 rmax2 = bimax[row]; 5810 nrow2 = bilen[row]; 5811 low2 = 0; 5812 high2 = nrow2; 5813 bm = aij->B->rmap->n; 5814 ba = b->a; 5815 } 5816 } else col = in[j]; 5817 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5818 } 5819 } 5820 } else if (!aij->donotstash) { 5821 if (roworiented) { 5822 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5823 } else { 5824 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5825 } 5826 } 5827 } 5828 } 5829 PetscFunctionReturnVoid(); 5830 } 5831 5832