1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 109 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 110 { 111 PetscErrorCode ierr; 112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 113 114 PetscFunctionBegin; 115 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 116 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 117 } else { 118 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 119 } 120 PetscFunctionReturn(0); 121 } 122 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 127 { 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 129 PetscErrorCode ierr; 130 PetscInt i,rstart,nrows,*rows; 131 132 PetscFunctionBegin; 133 *zrows = NULL; 134 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 135 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 136 for (i=0; i<nrows; i++) rows[i] += rstart; 137 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 144 { 145 PetscErrorCode ierr; 146 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 147 PetscInt i,n,*garray = aij->garray; 148 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 149 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 150 PetscReal *work; 151 152 PetscFunctionBegin; 153 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 154 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 155 if (type == NORM_2) { 156 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 157 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 158 } 159 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 160 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 161 } 162 } else if (type == NORM_1) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 168 } 169 } else if (type == NORM_INFINITY) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 175 } 176 177 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 178 if (type == NORM_INFINITY) { 179 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 180 } else { 181 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 182 } 183 ierr = PetscFree(work);CHKERRQ(ierr); 184 if (type == NORM_2) { 185 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatDistribute_MPIAIJ" 192 /* 193 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 194 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 195 196 Only for square matrices 197 198 Used by a preconditioner, hence PETSC_EXTERN 199 */ 200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 201 { 202 PetscMPIInt rank,size; 203 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 204 PetscErrorCode ierr; 205 Mat mat; 206 Mat_SeqAIJ *gmata; 207 PetscMPIInt tag; 208 MPI_Status status; 209 PetscBool aij; 210 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 211 212 PetscFunctionBegin; 213 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 215 if (!rank) { 216 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 217 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 218 } 219 if (reuse == MAT_INITIAL_MATRIX) { 220 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 221 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 222 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 223 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 224 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 225 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 226 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 227 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 228 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 229 230 rowners[0] = 0; 231 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 232 rstart = rowners[rank]; 233 rend = rowners[rank+1]; 234 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 235 if (!rank) { 236 gmata = (Mat_SeqAIJ*) gmat->data; 237 /* send row lengths to all processors */ 238 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 239 for (i=1; i<size; i++) { 240 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 /* determine number diagonal and off-diagonal counts */ 243 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 244 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 245 jj = 0; 246 for (i=0; i<m; i++) { 247 for (j=0; j<dlens[i]; j++) { 248 if (gmata->j[jj] < rstart) ld[i]++; 249 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 250 jj++; 251 } 252 } 253 /* send column indices to other processes */ 254 for (i=1; i<size; i++) { 255 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 256 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 257 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 } 259 260 /* send numerical values to other processes */ 261 for (i=1; i<size; i++) { 262 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 263 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 264 } 265 gmataa = gmata->a; 266 gmataj = gmata->j; 267 268 } else { 269 /* receive row lengths */ 270 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 271 /* receive column indices */ 272 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 273 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 275 /* determine number diagonal and off-diagonal counts */ 276 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 277 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 278 jj = 0; 279 for (i=0; i<m; i++) { 280 for (j=0; j<dlens[i]; j++) { 281 if (gmataj[jj] < rstart) ld[i]++; 282 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 283 jj++; 284 } 285 } 286 /* receive numerical values */ 287 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 288 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 289 } 290 /* set preallocation */ 291 for (i=0; i<m; i++) { 292 dlens[i] -= olens[i]; 293 } 294 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 295 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 296 297 for (i=0; i<m; i++) { 298 dlens[i] += olens[i]; 299 } 300 cnt = 0; 301 for (i=0; i<m; i++) { 302 row = rstart + i; 303 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 304 cnt += dlens[i]; 305 } 306 if (rank) { 307 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 308 } 309 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 310 ierr = PetscFree(rowners);CHKERRQ(ierr); 311 312 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 313 314 *inmat = mat; 315 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 316 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 317 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 318 mat = *inmat; 319 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 320 if (!rank) { 321 /* send numerical values to other processes */ 322 gmata = (Mat_SeqAIJ*) gmat->data; 323 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 324 gmataa = gmata->a; 325 for (i=1; i<size; i++) { 326 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 327 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 328 } 329 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 330 } else { 331 /* receive numerical values from process 0*/ 332 nz = Ad->nz + Ao->nz; 333 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 334 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 335 } 336 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 337 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 338 ad = Ad->a; 339 ao = Ao->a; 340 if (mat->rmap->n) { 341 i = 0; 342 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 343 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 344 } 345 for (i=1; i<mat->rmap->n; i++) { 346 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 347 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 348 } 349 i--; 350 if (mat->rmap->n) { 351 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 352 } 353 if (rank) { 354 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 355 } 356 } 357 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to acess. 368 */ 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 372 { 373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 374 PetscErrorCode ierr; 375 PetscInt n = aij->B->cmap->n,i; 376 377 PetscFunctionBegin; 378 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 379 #if defined(PETSC_USE_CTABLE) 380 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 381 for (i=0; i<n; i++) { 382 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 383 } 384 #else 385 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 386 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 387 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 388 #endif 389 PetscFunctionReturn(0); 390 } 391 392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 393 { \ 394 if (col <= lastcol1) low1 = 0; \ 395 else high1 = nrow1; \ 396 lastcol1 = col;\ 397 while (high1-low1 > 5) { \ 398 t = (low1+high1)/2; \ 399 if (rp1[t] > col) high1 = t; \ 400 else low1 = t; \ 401 } \ 402 for (_i=low1; _i<high1; _i++) { \ 403 if (rp1[_i] > col) break; \ 404 if (rp1[_i] == col) { \ 405 if (addv == ADD_VALUES) ap1[_i] += value; \ 406 else ap1[_i] = value; \ 407 goto a_noinsert; \ 408 } \ 409 } \ 410 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 411 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 412 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 413 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 414 N = nrow1++ - 1; a->nz++; high1++; \ 415 /* shift up all the later entries in this row */ \ 416 for (ii=N; ii>=_i; ii--) { \ 417 rp1[ii+1] = rp1[ii]; \ 418 ap1[ii+1] = ap1[ii]; \ 419 } \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++;\ 423 a_noinsert: ; \ 424 ailen[row] = nrow1; \ 425 } 426 427 428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 429 { \ 430 if (col <= lastcol2) low2 = 0; \ 431 else high2 = nrow2; \ 432 lastcol2 = col; \ 433 while (high2-low2 > 5) { \ 434 t = (low2+high2)/2; \ 435 if (rp2[t] > col) high2 = t; \ 436 else low2 = t; \ 437 } \ 438 for (_i=low2; _i<high2; _i++) { \ 439 if (rp2[_i] > col) break; \ 440 if (rp2[_i] == col) { \ 441 if (addv == ADD_VALUES) ap2[_i] += value; \ 442 else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 447 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 448 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 449 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 450 N = nrow2++ - 1; b->nz++; high2++; \ 451 /* shift up all the later entries in this row */ \ 452 for (ii=N; ii>=_i; ii--) { \ 453 rp2[ii+1] = rp2[ii]; \ 454 ap2[ii+1] = ap2[ii]; \ 455 } \ 456 rp2[_i] = col; \ 457 ap2[_i] = value; \ 458 B->nonzerostate++; \ 459 b_noinsert: ; \ 460 bilen[row] = nrow2; \ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 466 { 467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 469 PetscErrorCode ierr; 470 PetscInt l,*garray = mat->garray,diag; 471 472 PetscFunctionBegin; 473 /* code only works for square matrices A */ 474 475 /* find size of row to the left of the diagonal part */ 476 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 477 row = row - diag; 478 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 479 if (garray[b->j[b->i[row]+l]] > diag) break; 480 } 481 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 482 483 /* diagonal part */ 484 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 485 486 /* right of diagonal part */ 487 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatSetValues_MPIAIJ" 493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 494 { 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 496 PetscScalar value; 497 PetscErrorCode ierr; 498 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 499 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 500 PetscBool roworiented = aij->roworiented; 501 502 /* Some Variables required in the macro */ 503 Mat A = aij->A; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 505 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 506 MatScalar *aa = a->a; 507 PetscBool ignorezeroentries = a->ignorezeroentries; 508 Mat B = aij->B; 509 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 510 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 511 MatScalar *ba = b->a; 512 513 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 514 PetscInt nonew; 515 MatScalar *ap1,*ap2; 516 517 PetscFunctionBegin; 518 for (i=0; i<m; i++) { 519 if (im[i] < 0) continue; 520 #if defined(PETSC_USE_DEBUG) 521 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 522 #endif 523 if (im[i] >= rstart && im[i] < rend) { 524 row = im[i] - rstart; 525 lastcol1 = -1; 526 rp1 = aj + ai[row]; 527 ap1 = aa + ai[row]; 528 rmax1 = aimax[row]; 529 nrow1 = ailen[row]; 530 low1 = 0; 531 high1 = nrow1; 532 lastcol2 = -1; 533 rp2 = bj + bi[row]; 534 ap2 = ba + bi[row]; 535 rmax2 = bimax[row]; 536 nrow2 = bilen[row]; 537 low2 = 0; 538 high2 = nrow2; 539 540 for (j=0; j<n; j++) { 541 if (roworiented) value = v[i*n+j]; 542 else value = v[i+j*m]; 543 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 544 if (in[j] >= cstart && in[j] < cend) { 545 col = in[j] - cstart; 546 nonew = a->nonew; 547 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 548 } else if (in[j] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 551 #endif 552 else { 553 if (mat->was_assembled) { 554 if (!aij->colmap) { 555 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 559 col--; 560 #else 561 col = aij->colmap[in[j]] - 1; 562 #endif 563 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 564 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 565 col = in[j]; 566 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 567 B = aij->B; 568 b = (Mat_SeqAIJ*)B->data; 569 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 570 rp2 = bj + bi[row]; 571 ap2 = ba + bi[row]; 572 rmax2 = bimax[row]; 573 nrow2 = bilen[row]; 574 low2 = 0; 575 high2 = nrow2; 576 bm = aij->B->rmap->n; 577 ba = b->a; 578 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 579 } else col = in[j]; 580 nonew = b->nonew; 581 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 582 } 583 } 584 } else { 585 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 586 if (!aij->donotstash) { 587 mat->assembled = PETSC_FALSE; 588 if (roworiented) { 589 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 590 } else { 591 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 592 } 593 } 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatGetValues_MPIAIJ" 601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 602 { 603 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 604 PetscErrorCode ierr; 605 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 606 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 607 608 PetscFunctionBegin; 609 for (i=0; i<m; i++) { 610 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 611 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 612 if (idxm[i] >= rstart && idxm[i] < rend) { 613 row = idxm[i] - rstart; 614 for (j=0; j<n; j++) { 615 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 616 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 617 if (idxn[j] >= cstart && idxn[j] < cend) { 618 col = idxn[j] - cstart; 619 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } else { 621 if (!aij->colmap) { 622 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 623 } 624 #if defined(PETSC_USE_CTABLE) 625 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 626 col--; 627 #else 628 col = aij->colmap[idxn[j]] - 1; 629 #endif 630 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 631 else { 632 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } 634 } 635 } 636 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 637 } 638 PetscFunctionReturn(0); 639 } 640 641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 648 PetscErrorCode ierr; 649 PetscInt nstash,reallocs; 650 InsertMode addv; 651 652 PetscFunctionBegin; 653 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 654 655 /* make sure all processors are either in INSERTMODE or ADDMODE */ 656 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 657 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 658 mat->insertmode = addv; /* in case this processor had no cache */ 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 672 PetscErrorCode ierr; 673 PetscMPIInt n; 674 PetscInt i,j,rstart,ncols,flg; 675 PetscInt *row,*col; 676 PetscBool other_disassembled; 677 PetscScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 681 682 PetscFunctionBegin; 683 if (!aij->donotstash && !mat->nooffprocentries) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 if (!flg) break; 687 688 for (i=0; i<n; ) { 689 /* Now identify the consecutive vals belonging to the same row */ 690 for (j=i,rstart=row[j]; j<n; j++) { 691 if (row[j] != rstart) break; 692 } 693 if (j < n) ncols = j-i; 694 else ncols = n-i; 695 /* Now assemble all these values with a single function call */ 696 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 697 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 } 703 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 705 706 /* determine if any processor has disassembled, if so we must 707 also disassemble ourselfs, in order that we may reassemble. */ 708 /* 709 if nonzero structure of submatrix B cannot change then we know that 710 no processor disassembled thus we can skip this stuff 711 */ 712 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 713 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 714 if (mat->was_assembled && !other_disassembled) { 715 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 716 } 717 } 718 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 719 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 720 } 721 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 722 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 724 725 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 726 727 aij->rowvalues = 0; 728 729 /* used by MatAXPY() */ 730 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 731 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 732 733 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 734 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 735 736 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 737 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 738 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 739 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 747 { 748 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 753 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatZeroRows_MPIAIJ" 759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 760 { 761 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 762 PetscInt *owners = A->rmap->range; 763 PetscInt n = A->rmap->n; 764 PetscSF sf; 765 PetscInt *lrows; 766 PetscSFNode *rrows; 767 PetscInt r, p = 0, len = 0; 768 PetscErrorCode ierr; 769 770 PetscFunctionBegin; 771 /* Create SF where leaves are input rows and roots are owned rows */ 772 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 773 for (r = 0; r < n; ++r) lrows[r] = -1; 774 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 775 for (r = 0; r < N; ++r) { 776 const PetscInt idx = rows[r]; 777 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 778 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 779 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 780 } 781 if (A->nooffproczerorows) { 782 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 783 lrows[len++] = idx - owners[p]; 784 } else { 785 rrows[r].rank = p; 786 rrows[r].index = rows[r] - owners[p]; 787 } 788 } 789 if (!A->nooffproczerorows) { 790 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 791 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 792 /* Collect flags for rows to be zeroed */ 793 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 794 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 795 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 796 /* Compress and put in row numbers */ 797 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 798 } 799 /* fix right hand side if needed */ 800 if (x && b) { 801 const PetscScalar *xx; 802 PetscScalar *bb; 803 804 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 805 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 806 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 807 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 808 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 809 } 810 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 811 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 812 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 813 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 814 } else if (diag != 0.0) { 815 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 828 /* only change matrix nonzero state if pattern was allowed to be changed */ 829 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 830 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 831 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,p = 0,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x) { 888 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 891 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 892 } 893 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 894 /* remove zeroed rows of off diagonal matrix */ 895 ii = aij->i; 896 for (i=0; i<len; i++) { 897 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 898 } 899 /* loop over all elements of off process part of matrix zeroing removed columns*/ 900 if (aij->compressedrow.use) { 901 m = aij->compressedrow.nrows; 902 ii = aij->compressedrow.i; 903 ridx = aij->compressedrow.rindex; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 909 for (j=0; j<n; j++) { 910 if (PetscAbsScalar(mask[*aj])) { 911 if (b) bb[*ridx] -= *aa*xx[*aj]; 912 *aa = 0.0; 913 } 914 aa++; 915 aj++; 916 } 917 ridx++; 918 } 919 } else { /* do not use compressed row format */ 920 m = l->B->rmap->n; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[i] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 } 934 } 935 if (x) { 936 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 937 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 938 } 939 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 940 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMult_MPIAIJ" 953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 954 { 955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt nt; 958 959 PetscFunctionBegin; 960 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 961 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 962 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultAdd_MPIAIJ" 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 PetscBool merged; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1006 /* do nondiagonal part */ 1007 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1008 if (!merged) { 1009 /* send it on its way */ 1010 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* receive remote parts: note this assumes the values are not actually */ 1014 /* added in yy until the next line, */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } else { 1017 /* do local part */ 1018 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* send it on its way */ 1020 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 /* values actually were received in the Begin() but we need to call this nop */ 1022 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1029 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1030 { 1031 MPI_Comm comm; 1032 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1033 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1034 IS Me,Notme; 1035 PetscErrorCode ierr; 1036 PetscInt M,N,first,last,*notme,i; 1037 PetscMPIInt size; 1038 1039 PetscFunctionBegin; 1040 /* Easy test: symmetric diagonal block */ 1041 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1042 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* do local part */ 1082 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1083 /* receive remote parts */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* 1089 This only works correctly for square matrices where the subblock A->A is the 1090 diagonal block 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1095 { 1096 PetscErrorCode ierr; 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1101 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1102 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatScale_MPIAIJ" 1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1115 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatDestroy_Redundant" 1121 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1122 { 1123 PetscErrorCode ierr; 1124 Mat_Redundant *redund = *redundant; 1125 PetscInt i; 1126 1127 PetscFunctionBegin; 1128 *redundant = NULL; 1129 if (redund){ 1130 if (redund->matseq) { /* via MatGetSubMatrices() */ 1131 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1132 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1133 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1134 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1135 } else { 1136 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1137 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1138 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1139 for (i=0; i<redund->nrecvs; i++) { 1140 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1141 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1142 } 1143 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1144 } 1145 1146 if (redund->psubcomm) { 1147 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1148 } 1149 ierr = PetscFree(redund);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatDestroy_MPIAIJ" 1156 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1157 { 1158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1159 PetscErrorCode ierr; 1160 1161 PetscFunctionBegin; 1162 #if defined(PETSC_USE_LOG) 1163 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1164 #endif 1165 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1166 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1167 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1168 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1169 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1170 #if defined(PETSC_USE_CTABLE) 1171 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1172 #else 1173 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1174 #endif 1175 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1176 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1177 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1178 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1179 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1180 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1181 1182 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1188 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1189 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1190 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1196 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1197 { 1198 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1199 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1200 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1201 PetscErrorCode ierr; 1202 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1203 int fd; 1204 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1205 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1206 PetscScalar *column_values; 1207 PetscInt message_count,flowcontrolcount; 1208 FILE *file; 1209 1210 PetscFunctionBegin; 1211 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1212 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1213 nz = A->nz + B->nz; 1214 if (!rank) { 1215 header[0] = MAT_FILE_CLASSID; 1216 header[1] = mat->rmap->N; 1217 header[2] = mat->cmap->N; 1218 1219 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1220 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1221 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1222 /* get largest number of rows any processor has */ 1223 rlen = mat->rmap->n; 1224 range = mat->rmap->range; 1225 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1226 } else { 1227 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1228 rlen = mat->rmap->n; 1229 } 1230 1231 /* load up the local row counts */ 1232 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1233 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1234 1235 /* store the row lengths to the file */ 1236 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1237 if (!rank) { 1238 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1239 for (i=1; i<size; i++) { 1240 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1241 rlen = range[i+1] - range[i]; 1242 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1243 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1244 } 1245 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1246 } else { 1247 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1248 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1249 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1250 } 1251 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1252 1253 /* load up the local column indices */ 1254 nzmax = nz; /* th processor needs space a largest processor needs */ 1255 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1256 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1257 cnt = 0; 1258 for (i=0; i<mat->rmap->n; i++) { 1259 for (j=B->i[i]; j<B->i[i+1]; j++) { 1260 if ((col = garray[B->j[j]]) > cstart) break; 1261 column_indices[cnt++] = col; 1262 } 1263 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1264 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1265 } 1266 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1267 1268 /* store the column indices to the file */ 1269 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1270 if (!rank) { 1271 MPI_Status status; 1272 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1273 for (i=1; i<size; i++) { 1274 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1275 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1276 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1277 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1278 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1279 } 1280 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1281 } else { 1282 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1283 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1284 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1286 } 1287 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1288 1289 /* load up the local column values */ 1290 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1291 cnt = 0; 1292 for (i=0; i<mat->rmap->n; i++) { 1293 for (j=B->i[i]; j<B->i[i+1]; j++) { 1294 if (garray[B->j[j]] > cstart) break; 1295 column_values[cnt++] = B->a[j]; 1296 } 1297 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1298 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1299 } 1300 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1301 1302 /* store the column values to the file */ 1303 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1304 if (!rank) { 1305 MPI_Status status; 1306 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1307 for (i=1; i<size; i++) { 1308 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1309 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1310 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1311 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1312 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1313 } 1314 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1315 } else { 1316 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1317 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1318 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1319 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1320 } 1321 ierr = PetscFree(column_values);CHKERRQ(ierr); 1322 1323 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1324 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #include <petscdraw.h> 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1331 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1332 { 1333 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1334 PetscErrorCode ierr; 1335 PetscMPIInt rank = aij->rank,size = aij->size; 1336 PetscBool isdraw,iascii,isbinary; 1337 PetscViewer sviewer; 1338 PetscViewerFormat format; 1339 1340 PetscFunctionBegin; 1341 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1342 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1343 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1344 if (iascii) { 1345 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1346 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1347 MatInfo info; 1348 PetscBool inodes; 1349 1350 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1351 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1352 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1353 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1354 if (!inodes) { 1355 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1356 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1357 } else { 1358 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1359 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1360 } 1361 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1362 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1363 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1364 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1365 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1366 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1367 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1368 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1371 PetscInt inodecount,inodelimit,*inodes; 1372 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1373 if (inodes) { 1374 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1375 } else { 1376 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1377 } 1378 PetscFunctionReturn(0); 1379 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1380 PetscFunctionReturn(0); 1381 } 1382 } else if (isbinary) { 1383 if (size == 1) { 1384 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1385 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1386 } else { 1387 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1388 } 1389 PetscFunctionReturn(0); 1390 } else if (isdraw) { 1391 PetscDraw draw; 1392 PetscBool isnull; 1393 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1394 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1395 } 1396 1397 { 1398 /* assemble the entire matrix onto first processor. */ 1399 Mat A; 1400 Mat_SeqAIJ *Aloc; 1401 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1402 MatScalar *a; 1403 1404 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1405 if (!rank) { 1406 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1407 } else { 1408 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1409 } 1410 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1411 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1412 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1413 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1414 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1415 1416 /* copy over the A part */ 1417 Aloc = (Mat_SeqAIJ*)aij->A->data; 1418 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1419 row = mat->rmap->rstart; 1420 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1421 for (i=0; i<m; i++) { 1422 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1423 row++; 1424 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1425 } 1426 aj = Aloc->j; 1427 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1428 1429 /* copy over the B part */ 1430 Aloc = (Mat_SeqAIJ*)aij->B->data; 1431 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1432 row = mat->rmap->rstart; 1433 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1434 ct = cols; 1435 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1436 for (i=0; i<m; i++) { 1437 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1438 row++; 1439 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1440 } 1441 ierr = PetscFree(ct);CHKERRQ(ierr); 1442 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1443 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1444 /* 1445 Everyone has to call to draw the matrix since the graphics waits are 1446 synchronized across all processors that share the PetscDraw object 1447 */ 1448 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1449 if (!rank) { 1450 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1451 } 1452 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1453 ierr = MatDestroy(&A);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatView_MPIAIJ" 1460 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1461 { 1462 PetscErrorCode ierr; 1463 PetscBool iascii,isdraw,issocket,isbinary; 1464 1465 PetscFunctionBegin; 1466 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1467 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1468 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1469 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1470 if (iascii || isdraw || isbinary || issocket) { 1471 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatSOR_MPIAIJ" 1478 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1479 { 1480 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1481 PetscErrorCode ierr; 1482 Vec bb1 = 0; 1483 PetscBool hasop; 1484 1485 PetscFunctionBegin; 1486 if (flag == SOR_APPLY_UPPER) { 1487 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1488 PetscFunctionReturn(0); 1489 } 1490 1491 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1492 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1493 } 1494 1495 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1496 if (flag & SOR_ZERO_INITIAL_GUESS) { 1497 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1498 its--; 1499 } 1500 1501 while (its--) { 1502 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1504 1505 /* update rhs: bb1 = bb - B*x */ 1506 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1507 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1508 1509 /* local sweep */ 1510 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1511 } 1512 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1513 if (flag & SOR_ZERO_INITIAL_GUESS) { 1514 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1515 its--; 1516 } 1517 while (its--) { 1518 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 1521 /* update rhs: bb1 = bb - B*x */ 1522 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1523 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1524 1525 /* local sweep */ 1526 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1527 } 1528 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1529 if (flag & SOR_ZERO_INITIAL_GUESS) { 1530 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1531 its--; 1532 } 1533 while (its--) { 1534 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1535 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1536 1537 /* update rhs: bb1 = bb - B*x */ 1538 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1539 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1540 1541 /* local sweep */ 1542 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1543 } 1544 } else if (flag & SOR_EISENSTAT) { 1545 Vec xx1; 1546 1547 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1548 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1549 1550 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1551 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 if (!mat->diag) { 1553 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1554 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1555 } 1556 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1557 if (hasop) { 1558 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1559 } else { 1560 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1561 } 1562 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1563 1564 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1565 1566 /* local sweep */ 1567 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1568 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1569 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1570 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1571 1572 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "MatPermute_MPIAIJ" 1578 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1579 { 1580 Mat aA,aB,Aperm; 1581 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1582 PetscScalar *aa,*ba; 1583 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1584 PetscSF rowsf,sf; 1585 IS parcolp = NULL; 1586 PetscBool done; 1587 PetscErrorCode ierr; 1588 1589 PetscFunctionBegin; 1590 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1591 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1592 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1593 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1594 1595 /* Invert row permutation to find out where my rows should go */ 1596 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1597 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1598 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1599 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1600 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1601 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1602 1603 /* Invert column permutation to find out where my columns should go */ 1604 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1605 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1606 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1607 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1608 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1609 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1610 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1611 1612 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1613 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1614 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1615 1616 /* Find out where my gcols should go */ 1617 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1618 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1619 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1620 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1621 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1622 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1623 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1624 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1625 1626 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1627 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1628 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1629 for (i=0; i<m; i++) { 1630 PetscInt row = rdest[i],rowner; 1631 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1632 for (j=ai[i]; j<ai[i+1]; j++) { 1633 PetscInt cowner,col = cdest[aj[j]]; 1634 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1635 if (rowner == cowner) dnnz[i]++; 1636 else onnz[i]++; 1637 } 1638 for (j=bi[i]; j<bi[i+1]; j++) { 1639 PetscInt cowner,col = gcdest[bj[j]]; 1640 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1641 if (rowner == cowner) dnnz[i]++; 1642 else onnz[i]++; 1643 } 1644 } 1645 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1646 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1647 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1648 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1649 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1650 1651 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1652 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1653 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1654 for (i=0; i<m; i++) { 1655 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1656 PetscInt j0,rowlen; 1657 rowlen = ai[i+1] - ai[i]; 1658 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1659 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1660 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1661 } 1662 rowlen = bi[i+1] - bi[i]; 1663 for (j0=j=0; j<rowlen; j0=j) { 1664 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1665 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1666 } 1667 } 1668 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1669 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1670 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1671 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1672 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1673 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1674 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1675 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1676 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1677 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1678 *B = Aperm; 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1684 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1685 { 1686 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1687 Mat A = mat->A,B = mat->B; 1688 PetscErrorCode ierr; 1689 PetscReal isend[5],irecv[5]; 1690 1691 PetscFunctionBegin; 1692 info->block_size = 1.0; 1693 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1694 1695 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1696 isend[3] = info->memory; isend[4] = info->mallocs; 1697 1698 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1699 1700 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1701 isend[3] += info->memory; isend[4] += info->mallocs; 1702 if (flag == MAT_LOCAL) { 1703 info->nz_used = isend[0]; 1704 info->nz_allocated = isend[1]; 1705 info->nz_unneeded = isend[2]; 1706 info->memory = isend[3]; 1707 info->mallocs = isend[4]; 1708 } else if (flag == MAT_GLOBAL_MAX) { 1709 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1710 1711 info->nz_used = irecv[0]; 1712 info->nz_allocated = irecv[1]; 1713 info->nz_unneeded = irecv[2]; 1714 info->memory = irecv[3]; 1715 info->mallocs = irecv[4]; 1716 } else if (flag == MAT_GLOBAL_SUM) { 1717 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1718 1719 info->nz_used = irecv[0]; 1720 info->nz_allocated = irecv[1]; 1721 info->nz_unneeded = irecv[2]; 1722 info->memory = irecv[3]; 1723 info->mallocs = irecv[4]; 1724 } 1725 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1726 info->fill_ratio_needed = 0; 1727 info->factor_mallocs = 0; 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatSetOption_MPIAIJ" 1733 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1734 { 1735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1736 PetscErrorCode ierr; 1737 1738 PetscFunctionBegin; 1739 switch (op) { 1740 case MAT_NEW_NONZERO_LOCATIONS: 1741 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1742 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1743 case MAT_KEEP_NONZERO_PATTERN: 1744 case MAT_NEW_NONZERO_LOCATION_ERR: 1745 case MAT_USE_INODES: 1746 case MAT_IGNORE_ZERO_ENTRIES: 1747 MatCheckPreallocated(A,1); 1748 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1749 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1750 break; 1751 case MAT_ROW_ORIENTED: 1752 a->roworiented = flg; 1753 1754 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1755 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1756 break; 1757 case MAT_NEW_DIAGONALS: 1758 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1759 break; 1760 case MAT_IGNORE_OFF_PROC_ENTRIES: 1761 a->donotstash = flg; 1762 break; 1763 case MAT_SPD: 1764 A->spd_set = PETSC_TRUE; 1765 A->spd = flg; 1766 if (flg) { 1767 A->symmetric = PETSC_TRUE; 1768 A->structurally_symmetric = PETSC_TRUE; 1769 A->symmetric_set = PETSC_TRUE; 1770 A->structurally_symmetric_set = PETSC_TRUE; 1771 } 1772 break; 1773 case MAT_SYMMETRIC: 1774 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1775 break; 1776 case MAT_STRUCTURALLY_SYMMETRIC: 1777 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1778 break; 1779 case MAT_HERMITIAN: 1780 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1781 break; 1782 case MAT_SYMMETRY_ETERNAL: 1783 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1784 break; 1785 default: 1786 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1787 } 1788 PetscFunctionReturn(0); 1789 } 1790 1791 #undef __FUNCT__ 1792 #define __FUNCT__ "MatGetRow_MPIAIJ" 1793 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1794 { 1795 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1796 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1797 PetscErrorCode ierr; 1798 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1799 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1800 PetscInt *cmap,*idx_p; 1801 1802 PetscFunctionBegin; 1803 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1804 mat->getrowactive = PETSC_TRUE; 1805 1806 if (!mat->rowvalues && (idx || v)) { 1807 /* 1808 allocate enough space to hold information from the longest row. 1809 */ 1810 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1811 PetscInt max = 1,tmp; 1812 for (i=0; i<matin->rmap->n; i++) { 1813 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1814 if (max < tmp) max = tmp; 1815 } 1816 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1817 } 1818 1819 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1820 lrow = row - rstart; 1821 1822 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1823 if (!v) {pvA = 0; pvB = 0;} 1824 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1825 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1826 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1827 nztot = nzA + nzB; 1828 1829 cmap = mat->garray; 1830 if (v || idx) { 1831 if (nztot) { 1832 /* Sort by increasing column numbers, assuming A and B already sorted */ 1833 PetscInt imark = -1; 1834 if (v) { 1835 *v = v_p = mat->rowvalues; 1836 for (i=0; i<nzB; i++) { 1837 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1838 else break; 1839 } 1840 imark = i; 1841 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1842 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1843 } 1844 if (idx) { 1845 *idx = idx_p = mat->rowindices; 1846 if (imark > -1) { 1847 for (i=0; i<imark; i++) { 1848 idx_p[i] = cmap[cworkB[i]]; 1849 } 1850 } else { 1851 for (i=0; i<nzB; i++) { 1852 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1853 else break; 1854 } 1855 imark = i; 1856 } 1857 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1858 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1859 } 1860 } else { 1861 if (idx) *idx = 0; 1862 if (v) *v = 0; 1863 } 1864 } 1865 *nz = nztot; 1866 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1867 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1868 PetscFunctionReturn(0); 1869 } 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1873 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1874 { 1875 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1876 1877 PetscFunctionBegin; 1878 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1879 aij->getrowactive = PETSC_FALSE; 1880 PetscFunctionReturn(0); 1881 } 1882 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatNorm_MPIAIJ" 1885 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1886 { 1887 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1888 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1889 PetscErrorCode ierr; 1890 PetscInt i,j,cstart = mat->cmap->rstart; 1891 PetscReal sum = 0.0; 1892 MatScalar *v; 1893 1894 PetscFunctionBegin; 1895 if (aij->size == 1) { 1896 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1897 } else { 1898 if (type == NORM_FROBENIUS) { 1899 v = amat->a; 1900 for (i=0; i<amat->nz; i++) { 1901 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1902 } 1903 v = bmat->a; 1904 for (i=0; i<bmat->nz; i++) { 1905 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1906 } 1907 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1908 *norm = PetscSqrtReal(*norm); 1909 } else if (type == NORM_1) { /* max column norm */ 1910 PetscReal *tmp,*tmp2; 1911 PetscInt *jj,*garray = aij->garray; 1912 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1913 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1914 *norm = 0.0; 1915 v = amat->a; jj = amat->j; 1916 for (j=0; j<amat->nz; j++) { 1917 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1918 } 1919 v = bmat->a; jj = bmat->j; 1920 for (j=0; j<bmat->nz; j++) { 1921 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1922 } 1923 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1924 for (j=0; j<mat->cmap->N; j++) { 1925 if (tmp2[j] > *norm) *norm = tmp2[j]; 1926 } 1927 ierr = PetscFree(tmp);CHKERRQ(ierr); 1928 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1929 } else if (type == NORM_INFINITY) { /* max row norm */ 1930 PetscReal ntemp = 0.0; 1931 for (j=0; j<aij->A->rmap->n; j++) { 1932 v = amat->a + amat->i[j]; 1933 sum = 0.0; 1934 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1935 sum += PetscAbsScalar(*v); v++; 1936 } 1937 v = bmat->a + bmat->i[j]; 1938 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1939 sum += PetscAbsScalar(*v); v++; 1940 } 1941 if (sum > ntemp) ntemp = sum; 1942 } 1943 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1944 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatTranspose_MPIAIJ" 1951 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1952 { 1953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1954 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1955 PetscErrorCode ierr; 1956 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1957 PetscInt cstart = A->cmap->rstart,ncol; 1958 Mat B; 1959 MatScalar *array; 1960 1961 PetscFunctionBegin; 1962 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1963 1964 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1965 ai = Aloc->i; aj = Aloc->j; 1966 bi = Bloc->i; bj = Bloc->j; 1967 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1968 PetscInt *d_nnz,*g_nnz,*o_nnz; 1969 PetscSFNode *oloc; 1970 PETSC_UNUSED PetscSF sf; 1971 1972 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1973 /* compute d_nnz for preallocation */ 1974 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1975 for (i=0; i<ai[ma]; i++) { 1976 d_nnz[aj[i]]++; 1977 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1978 } 1979 /* compute local off-diagonal contributions */ 1980 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1981 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1982 /* map those to global */ 1983 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1984 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1985 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1986 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1987 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1988 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1989 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1990 1991 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1992 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1993 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1994 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1995 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1996 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1997 } else { 1998 B = *matout; 1999 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2000 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2001 } 2002 2003 /* copy over the A part */ 2004 array = Aloc->a; 2005 row = A->rmap->rstart; 2006 for (i=0; i<ma; i++) { 2007 ncol = ai[i+1]-ai[i]; 2008 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2009 row++; 2010 array += ncol; aj += ncol; 2011 } 2012 aj = Aloc->j; 2013 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2014 2015 /* copy over the B part */ 2016 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2017 array = Bloc->a; 2018 row = A->rmap->rstart; 2019 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2020 cols_tmp = cols; 2021 for (i=0; i<mb; i++) { 2022 ncol = bi[i+1]-bi[i]; 2023 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2024 row++; 2025 array += ncol; cols_tmp += ncol; 2026 } 2027 ierr = PetscFree(cols);CHKERRQ(ierr); 2028 2029 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2030 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2031 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2032 *matout = B; 2033 } else { 2034 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2035 } 2036 PetscFunctionReturn(0); 2037 } 2038 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2041 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2042 { 2043 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2044 Mat a = aij->A,b = aij->B; 2045 PetscErrorCode ierr; 2046 PetscInt s1,s2,s3; 2047 2048 PetscFunctionBegin; 2049 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2050 if (rr) { 2051 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2052 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2053 /* Overlap communication with computation. */ 2054 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2055 } 2056 if (ll) { 2057 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2058 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2059 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2060 } 2061 /* scale the diagonal block */ 2062 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2063 2064 if (rr) { 2065 /* Do a scatter end and then right scale the off-diagonal block */ 2066 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2067 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2068 } 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2074 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2075 { 2076 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2077 PetscErrorCode ierr; 2078 2079 PetscFunctionBegin; 2080 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2081 PetscFunctionReturn(0); 2082 } 2083 2084 #undef __FUNCT__ 2085 #define __FUNCT__ "MatEqual_MPIAIJ" 2086 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2087 { 2088 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2089 Mat a,b,c,d; 2090 PetscBool flg; 2091 PetscErrorCode ierr; 2092 2093 PetscFunctionBegin; 2094 a = matA->A; b = matA->B; 2095 c = matB->A; d = matB->B; 2096 2097 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2098 if (flg) { 2099 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2100 } 2101 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatCopy_MPIAIJ" 2107 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2108 { 2109 PetscErrorCode ierr; 2110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2111 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2112 2113 PetscFunctionBegin; 2114 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2115 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2116 /* because of the column compression in the off-processor part of the matrix a->B, 2117 the number of columns in a->B and b->B may be different, hence we cannot call 2118 the MatCopy() directly on the two parts. If need be, we can provide a more 2119 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2120 then copying the submatrices */ 2121 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2122 } else { 2123 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2124 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2125 } 2126 PetscFunctionReturn(0); 2127 } 2128 2129 #undef __FUNCT__ 2130 #define __FUNCT__ "MatSetUp_MPIAIJ" 2131 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2132 { 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2137 PetscFunctionReturn(0); 2138 } 2139 2140 /* 2141 Computes the number of nonzeros per row needed for preallocation when X and Y 2142 have different nonzero structure. 2143 */ 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2146 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2147 { 2148 PetscInt i,j,k,nzx,nzy; 2149 2150 PetscFunctionBegin; 2151 /* Set the number of nonzeros in the new matrix */ 2152 for (i=0; i<m; i++) { 2153 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2154 nzx = xi[i+1] - xi[i]; 2155 nzy = yi[i+1] - yi[i]; 2156 nnz[i] = 0; 2157 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2158 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2159 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2160 nnz[i]++; 2161 } 2162 for (; k<nzy; k++) nnz[i]++; 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2170 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2171 { 2172 PetscErrorCode ierr; 2173 PetscInt m = Y->rmap->N; 2174 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2175 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2176 2177 PetscFunctionBegin; 2178 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2179 PetscFunctionReturn(0); 2180 } 2181 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatAXPY_MPIAIJ" 2184 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2185 { 2186 PetscErrorCode ierr; 2187 PetscInt i; 2188 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2189 PetscBLASInt bnz,one=1; 2190 Mat_SeqAIJ *x,*y; 2191 2192 PetscFunctionBegin; 2193 if (str == SAME_NONZERO_PATTERN) { 2194 PetscScalar alpha = a; 2195 x = (Mat_SeqAIJ*)xx->A->data; 2196 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2197 y = (Mat_SeqAIJ*)yy->A->data; 2198 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2199 x = (Mat_SeqAIJ*)xx->B->data; 2200 y = (Mat_SeqAIJ*)yy->B->data; 2201 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2202 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2203 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2204 } else if (str == SUBSET_NONZERO_PATTERN) { 2205 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2206 2207 x = (Mat_SeqAIJ*)xx->B->data; 2208 y = (Mat_SeqAIJ*)yy->B->data; 2209 if (y->xtoy && y->XtoY != xx->B) { 2210 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2211 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2212 } 2213 if (!y->xtoy) { /* get xtoy */ 2214 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2215 y->XtoY = xx->B; 2216 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2217 } 2218 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2219 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2220 } else { 2221 Mat B; 2222 PetscInt *nnz_d,*nnz_o; 2223 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2224 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2225 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2226 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2227 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2228 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2229 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2230 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2231 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2232 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2233 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2234 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2235 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2236 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2237 } 2238 PetscFunctionReturn(0); 2239 } 2240 2241 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatConjugate_MPIAIJ" 2245 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2246 { 2247 #if defined(PETSC_USE_COMPLEX) 2248 PetscErrorCode ierr; 2249 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2250 2251 PetscFunctionBegin; 2252 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2253 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2254 #else 2255 PetscFunctionBegin; 2256 #endif 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatRealPart_MPIAIJ" 2262 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2263 { 2264 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2265 PetscErrorCode ierr; 2266 2267 PetscFunctionBegin; 2268 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2269 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2270 PetscFunctionReturn(0); 2271 } 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2275 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2276 { 2277 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2278 PetscErrorCode ierr; 2279 2280 PetscFunctionBegin; 2281 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2282 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2283 PetscFunctionReturn(0); 2284 } 2285 2286 #if defined(PETSC_HAVE_PBGL) 2287 2288 #include <boost/parallel/mpi/bsp_process_group.hpp> 2289 #include <boost/graph/distributed/ilu_default_graph.hpp> 2290 #include <boost/graph/distributed/ilu_0_block.hpp> 2291 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2292 #include <boost/graph/distributed/petsc/interface.hpp> 2293 #include <boost/multi_array.hpp> 2294 #include <boost/parallel/distributed_property_map->hpp> 2295 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2298 /* 2299 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2300 */ 2301 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2302 { 2303 namespace petsc = boost::distributed::petsc; 2304 2305 namespace graph_dist = boost::graph::distributed; 2306 using boost::graph::distributed::ilu_default::process_group_type; 2307 using boost::graph::ilu_permuted; 2308 2309 PetscBool row_identity, col_identity; 2310 PetscContainer c; 2311 PetscInt m, n, M, N; 2312 PetscErrorCode ierr; 2313 2314 PetscFunctionBegin; 2315 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2316 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2317 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2318 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2319 2320 process_group_type pg; 2321 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2322 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2323 lgraph_type& level_graph = *lgraph_p; 2324 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2325 2326 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2327 ilu_permuted(level_graph); 2328 2329 /* put together the new matrix */ 2330 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2331 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2332 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2333 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2334 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2335 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2336 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2337 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2338 2339 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2340 ierr = PetscContainerSetPointer(c, lgraph_p); 2341 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2342 ierr = PetscContainerDestroy(&c); 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2348 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2349 { 2350 PetscFunctionBegin; 2351 PetscFunctionReturn(0); 2352 } 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatSolve_MPIAIJ" 2356 /* 2357 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2358 */ 2359 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2360 { 2361 namespace graph_dist = boost::graph::distributed; 2362 2363 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2364 lgraph_type *lgraph_p; 2365 PetscContainer c; 2366 PetscErrorCode ierr; 2367 2368 PetscFunctionBegin; 2369 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2370 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2371 ierr = VecCopy(b, x);CHKERRQ(ierr); 2372 2373 PetscScalar *array_x; 2374 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2375 PetscInt sx; 2376 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2377 2378 PetscScalar *array_b; 2379 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2380 PetscInt sb; 2381 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2382 2383 lgraph_type& level_graph = *lgraph_p; 2384 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2385 2386 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2387 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2388 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2389 2390 typedef boost::iterator_property_map<array_ref_type::iterator, 2391 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2392 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2393 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2394 2395 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2396 PetscFunctionReturn(0); 2397 } 2398 #endif 2399 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2403 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2404 { 2405 PetscMPIInt rank,size; 2406 MPI_Comm comm; 2407 PetscErrorCode ierr; 2408 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2409 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2410 PetscInt *rowrange = mat->rmap->range; 2411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2412 Mat A = aij->A,B=aij->B,C=*matredundant; 2413 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2414 PetscScalar *sbuf_a; 2415 PetscInt nzlocal=a->nz+b->nz; 2416 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2417 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2418 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2419 MatScalar *aworkA,*aworkB; 2420 PetscScalar *vals; 2421 PetscMPIInt tag1,tag2,tag3,imdex; 2422 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2423 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2424 MPI_Status recv_status,*send_status; 2425 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2426 PetscInt **rbuf_j=NULL; 2427 PetscScalar **rbuf_a=NULL; 2428 Mat_Redundant *redund =NULL; 2429 2430 PetscFunctionBegin; 2431 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2432 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2433 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2434 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2435 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2436 2437 if (reuse == MAT_REUSE_MATRIX) { 2438 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2439 if (subsize == 1) { 2440 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2441 redund = c->redundant; 2442 } else { 2443 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2444 redund = c->redundant; 2445 } 2446 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2447 2448 nsends = redund->nsends; 2449 nrecvs = redund->nrecvs; 2450 send_rank = redund->send_rank; 2451 recv_rank = redund->recv_rank; 2452 sbuf_nz = redund->sbuf_nz; 2453 rbuf_nz = redund->rbuf_nz; 2454 sbuf_j = redund->sbuf_j; 2455 sbuf_a = redund->sbuf_a; 2456 rbuf_j = redund->rbuf_j; 2457 rbuf_a = redund->rbuf_a; 2458 } 2459 2460 if (reuse == MAT_INITIAL_MATRIX) { 2461 PetscInt nleftover,np_subcomm; 2462 2463 /* get the destination processors' id send_rank, nsends and nrecvs */ 2464 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2465 2466 np_subcomm = size/nsubcomm; 2467 nleftover = size - nsubcomm*np_subcomm; 2468 2469 /* block of codes below is specific for INTERLACED */ 2470 /* ------------------------------------------------*/ 2471 nsends = 0; nrecvs = 0; 2472 for (i=0; i<size; i++) { 2473 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2474 send_rank[nsends++] = i; 2475 recv_rank[nrecvs++] = i; 2476 } 2477 } 2478 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2479 i = size-nleftover-1; 2480 j = 0; 2481 while (j < nsubcomm - nleftover) { 2482 send_rank[nsends++] = i; 2483 i--; j++; 2484 } 2485 } 2486 2487 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2488 for (i=0; i<nleftover; i++) { 2489 recv_rank[nrecvs++] = size-nleftover+i; 2490 } 2491 } 2492 /*----------------------------------------------*/ 2493 2494 /* allocate sbuf_j, sbuf_a */ 2495 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2496 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2497 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2498 /* 2499 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2500 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2501 */ 2502 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2503 2504 /* copy mat's local entries into the buffers */ 2505 if (reuse == MAT_INITIAL_MATRIX) { 2506 rownz_max = 0; 2507 rptr = sbuf_j; 2508 cols = sbuf_j + rend-rstart + 1; 2509 vals = sbuf_a; 2510 rptr[0] = 0; 2511 for (i=0; i<rend-rstart; i++) { 2512 row = i + rstart; 2513 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2514 ncols = nzA + nzB; 2515 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2516 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2517 /* load the column indices for this row into cols */ 2518 lwrite = 0; 2519 for (l=0; l<nzB; l++) { 2520 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2521 vals[lwrite] = aworkB[l]; 2522 cols[lwrite++] = ctmp; 2523 } 2524 } 2525 for (l=0; l<nzA; l++) { 2526 vals[lwrite] = aworkA[l]; 2527 cols[lwrite++] = cstart + cworkA[l]; 2528 } 2529 for (l=0; l<nzB; l++) { 2530 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2531 vals[lwrite] = aworkB[l]; 2532 cols[lwrite++] = ctmp; 2533 } 2534 } 2535 vals += ncols; 2536 cols += ncols; 2537 rptr[i+1] = rptr[i] + ncols; 2538 if (rownz_max < ncols) rownz_max = ncols; 2539 } 2540 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2541 } else { /* only copy matrix values into sbuf_a */ 2542 rptr = sbuf_j; 2543 vals = sbuf_a; 2544 rptr[0] = 0; 2545 for (i=0; i<rend-rstart; i++) { 2546 row = i + rstart; 2547 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2548 ncols = nzA + nzB; 2549 cworkB = b->j + b->i[i]; 2550 aworkA = a->a + a->i[i]; 2551 aworkB = b->a + b->i[i]; 2552 lwrite = 0; 2553 for (l=0; l<nzB; l++) { 2554 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2555 } 2556 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2557 for (l=0; l<nzB; l++) { 2558 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2559 } 2560 vals += ncols; 2561 rptr[i+1] = rptr[i] + ncols; 2562 } 2563 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2564 2565 /* send nzlocal to others, and recv other's nzlocal */ 2566 /*--------------------------------------------------*/ 2567 if (reuse == MAT_INITIAL_MATRIX) { 2568 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2569 2570 s_waits2 = s_waits3 + nsends; 2571 s_waits1 = s_waits2 + nsends; 2572 r_waits1 = s_waits1 + nsends; 2573 r_waits2 = r_waits1 + nrecvs; 2574 r_waits3 = r_waits2 + nrecvs; 2575 } else { 2576 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2577 2578 r_waits3 = s_waits3 + nsends; 2579 } 2580 2581 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2582 if (reuse == MAT_INITIAL_MATRIX) { 2583 /* get new tags to keep the communication clean */ 2584 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2585 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2586 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2587 2588 /* post receives of other's nzlocal */ 2589 for (i=0; i<nrecvs; i++) { 2590 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2591 } 2592 /* send nzlocal to others */ 2593 for (i=0; i<nsends; i++) { 2594 sbuf_nz[i] = nzlocal; 2595 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2596 } 2597 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2598 count = nrecvs; 2599 while (count) { 2600 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2601 2602 recv_rank[imdex] = recv_status.MPI_SOURCE; 2603 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2604 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2605 2606 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2607 2608 rbuf_nz[imdex] += i + 2; 2609 2610 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2611 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2612 count--; 2613 } 2614 /* wait on sends of nzlocal */ 2615 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2616 /* send mat->i,j to others, and recv from other's */ 2617 /*------------------------------------------------*/ 2618 for (i=0; i<nsends; i++) { 2619 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2620 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2621 } 2622 /* wait on receives of mat->i,j */ 2623 /*------------------------------*/ 2624 count = nrecvs; 2625 while (count) { 2626 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2627 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2628 count--; 2629 } 2630 /* wait on sends of mat->i,j */ 2631 /*---------------------------*/ 2632 if (nsends) { 2633 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2634 } 2635 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2636 2637 /* post receives, send and receive mat->a */ 2638 /*----------------------------------------*/ 2639 for (imdex=0; imdex<nrecvs; imdex++) { 2640 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2641 } 2642 for (i=0; i<nsends; i++) { 2643 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2644 } 2645 count = nrecvs; 2646 while (count) { 2647 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2648 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2649 count--; 2650 } 2651 if (nsends) { 2652 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2653 } 2654 2655 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2656 2657 /* create redundant matrix */ 2658 /*-------------------------*/ 2659 if (reuse == MAT_INITIAL_MATRIX) { 2660 const PetscInt *range; 2661 PetscInt rstart_sub,rend_sub,mloc_sub; 2662 2663 /* compute rownz_max for preallocation */ 2664 for (imdex=0; imdex<nrecvs; imdex++) { 2665 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2666 rptr = rbuf_j[imdex]; 2667 for (i=0; i<j; i++) { 2668 ncols = rptr[i+1] - rptr[i]; 2669 if (rownz_max < ncols) rownz_max = ncols; 2670 } 2671 } 2672 2673 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2674 2675 /* get local size of redundant matrix 2676 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2677 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2678 rstart_sub = range[nsubcomm*subrank]; 2679 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2680 rend_sub = range[nsubcomm*(subrank+1)]; 2681 } else { 2682 rend_sub = mat->rmap->N; 2683 } 2684 mloc_sub = rend_sub - rstart_sub; 2685 2686 if (M == N) { 2687 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2688 } else { /* non-square matrix */ 2689 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2690 } 2691 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2692 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2693 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2694 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2695 } else { 2696 C = *matredundant; 2697 } 2698 2699 /* insert local matrix entries */ 2700 rptr = sbuf_j; 2701 cols = sbuf_j + rend-rstart + 1; 2702 vals = sbuf_a; 2703 for (i=0; i<rend-rstart; i++) { 2704 row = i + rstart; 2705 ncols = rptr[i+1] - rptr[i]; 2706 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2707 vals += ncols; 2708 cols += ncols; 2709 } 2710 /* insert received matrix entries */ 2711 for (imdex=0; imdex<nrecvs; imdex++) { 2712 rstart = rowrange[recv_rank[imdex]]; 2713 rend = rowrange[recv_rank[imdex]+1]; 2714 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2715 rptr = rbuf_j[imdex]; 2716 cols = rbuf_j[imdex] + rend-rstart + 1; 2717 vals = rbuf_a[imdex]; 2718 for (i=0; i<rend-rstart; i++) { 2719 row = i + rstart; 2720 ncols = rptr[i+1] - rptr[i]; 2721 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2722 vals += ncols; 2723 cols += ncols; 2724 } 2725 } 2726 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2727 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2728 2729 if (reuse == MAT_INITIAL_MATRIX) { 2730 *matredundant = C; 2731 2732 /* create a supporting struct and attach it to C for reuse */ 2733 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2734 if (subsize == 1) { 2735 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2736 c->redundant = redund; 2737 } else { 2738 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2739 c->redundant = redund; 2740 } 2741 2742 redund->nzlocal = nzlocal; 2743 redund->nsends = nsends; 2744 redund->nrecvs = nrecvs; 2745 redund->send_rank = send_rank; 2746 redund->recv_rank = recv_rank; 2747 redund->sbuf_nz = sbuf_nz; 2748 redund->rbuf_nz = rbuf_nz; 2749 redund->sbuf_j = sbuf_j; 2750 redund->sbuf_a = sbuf_a; 2751 redund->rbuf_j = rbuf_j; 2752 redund->rbuf_a = rbuf_a; 2753 redund->psubcomm = NULL; 2754 } 2755 PetscFunctionReturn(0); 2756 } 2757 2758 #undef __FUNCT__ 2759 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2760 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2761 { 2762 PetscErrorCode ierr; 2763 MPI_Comm comm; 2764 PetscMPIInt size,subsize; 2765 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2766 Mat_Redundant *redund=NULL; 2767 PetscSubcomm psubcomm=NULL; 2768 MPI_Comm subcomm_in=subcomm; 2769 Mat *matseq; 2770 IS isrow,iscol; 2771 2772 PetscFunctionBegin; 2773 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2774 if (reuse == MAT_INITIAL_MATRIX) { 2775 /* create psubcomm, then get subcomm */ 2776 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2777 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2778 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2779 2780 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2781 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2782 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2783 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2784 subcomm = psubcomm->comm; 2785 } else { /* retrieve psubcomm and subcomm */ 2786 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2787 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2788 if (subsize == 1) { 2789 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2790 redund = c->redundant; 2791 } else { 2792 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2793 redund = c->redundant; 2794 } 2795 psubcomm = redund->psubcomm; 2796 } 2797 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2798 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2799 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2800 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2801 if (subsize == 1) { 2802 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2803 c->redundant->psubcomm = psubcomm; 2804 } else { 2805 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2806 c->redundant->psubcomm = psubcomm ; 2807 } 2808 } 2809 PetscFunctionReturn(0); 2810 } 2811 } 2812 2813 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2814 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2815 if (reuse == MAT_INITIAL_MATRIX) { 2816 /* create a local sequential matrix matseq[0] */ 2817 mloc_sub = PETSC_DECIDE; 2818 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2819 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2820 rstart = rend - mloc_sub; 2821 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2822 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2823 } else { /* reuse == MAT_REUSE_MATRIX */ 2824 if (subsize == 1) { 2825 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2826 redund = c->redundant; 2827 } else { 2828 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2829 redund = c->redundant; 2830 } 2831 2832 isrow = redund->isrow; 2833 iscol = redund->iscol; 2834 matseq = redund->matseq; 2835 } 2836 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2837 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2838 2839 if (reuse == MAT_INITIAL_MATRIX) { 2840 /* create a supporting struct and attach it to C for reuse */ 2841 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2842 if (subsize == 1) { 2843 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2844 c->redundant = redund; 2845 } else { 2846 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2847 c->redundant = redund; 2848 } 2849 redund->isrow = isrow; 2850 redund->iscol = iscol; 2851 redund->matseq = matseq; 2852 redund->psubcomm = psubcomm; 2853 } 2854 PetscFunctionReturn(0); 2855 } 2856 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2859 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2860 { 2861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2862 PetscErrorCode ierr; 2863 PetscInt i,*idxb = 0; 2864 PetscScalar *va,*vb; 2865 Vec vtmp; 2866 2867 PetscFunctionBegin; 2868 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2869 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2870 if (idx) { 2871 for (i=0; i<A->rmap->n; i++) { 2872 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2873 } 2874 } 2875 2876 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2877 if (idx) { 2878 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2879 } 2880 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2881 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2882 2883 for (i=0; i<A->rmap->n; i++) { 2884 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2885 va[i] = vb[i]; 2886 if (idx) idx[i] = a->garray[idxb[i]]; 2887 } 2888 } 2889 2890 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2891 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2892 ierr = PetscFree(idxb);CHKERRQ(ierr); 2893 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2894 PetscFunctionReturn(0); 2895 } 2896 2897 #undef __FUNCT__ 2898 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2899 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2900 { 2901 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2902 PetscErrorCode ierr; 2903 PetscInt i,*idxb = 0; 2904 PetscScalar *va,*vb; 2905 Vec vtmp; 2906 2907 PetscFunctionBegin; 2908 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2909 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2910 if (idx) { 2911 for (i=0; i<A->cmap->n; i++) { 2912 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2913 } 2914 } 2915 2916 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2917 if (idx) { 2918 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2919 } 2920 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2921 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2922 2923 for (i=0; i<A->rmap->n; i++) { 2924 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2925 va[i] = vb[i]; 2926 if (idx) idx[i] = a->garray[idxb[i]]; 2927 } 2928 } 2929 2930 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2931 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2932 ierr = PetscFree(idxb);CHKERRQ(ierr); 2933 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2934 PetscFunctionReturn(0); 2935 } 2936 2937 #undef __FUNCT__ 2938 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2939 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2940 { 2941 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2942 PetscInt n = A->rmap->n; 2943 PetscInt cstart = A->cmap->rstart; 2944 PetscInt *cmap = mat->garray; 2945 PetscInt *diagIdx, *offdiagIdx; 2946 Vec diagV, offdiagV; 2947 PetscScalar *a, *diagA, *offdiagA; 2948 PetscInt r; 2949 PetscErrorCode ierr; 2950 2951 PetscFunctionBegin; 2952 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2953 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2954 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2955 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2956 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2957 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2958 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2959 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2960 for (r = 0; r < n; ++r) { 2961 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2962 a[r] = diagA[r]; 2963 idx[r] = cstart + diagIdx[r]; 2964 } else { 2965 a[r] = offdiagA[r]; 2966 idx[r] = cmap[offdiagIdx[r]]; 2967 } 2968 } 2969 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2970 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2971 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2972 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2973 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2974 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2975 PetscFunctionReturn(0); 2976 } 2977 2978 #undef __FUNCT__ 2979 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2980 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2981 { 2982 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2983 PetscInt n = A->rmap->n; 2984 PetscInt cstart = A->cmap->rstart; 2985 PetscInt *cmap = mat->garray; 2986 PetscInt *diagIdx, *offdiagIdx; 2987 Vec diagV, offdiagV; 2988 PetscScalar *a, *diagA, *offdiagA; 2989 PetscInt r; 2990 PetscErrorCode ierr; 2991 2992 PetscFunctionBegin; 2993 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2994 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2995 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2996 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2997 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2998 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2999 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3000 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3001 for (r = 0; r < n; ++r) { 3002 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3003 a[r] = diagA[r]; 3004 idx[r] = cstart + diagIdx[r]; 3005 } else { 3006 a[r] = offdiagA[r]; 3007 idx[r] = cmap[offdiagIdx[r]]; 3008 } 3009 } 3010 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3011 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3012 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3013 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3014 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3015 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3016 PetscFunctionReturn(0); 3017 } 3018 3019 #undef __FUNCT__ 3020 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3021 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3022 { 3023 PetscErrorCode ierr; 3024 Mat *dummy; 3025 3026 PetscFunctionBegin; 3027 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3028 *newmat = *dummy; 3029 ierr = PetscFree(dummy);CHKERRQ(ierr); 3030 PetscFunctionReturn(0); 3031 } 3032 3033 #undef __FUNCT__ 3034 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3035 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3036 { 3037 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3038 PetscErrorCode ierr; 3039 3040 PetscFunctionBegin; 3041 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3042 PetscFunctionReturn(0); 3043 } 3044 3045 #undef __FUNCT__ 3046 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3047 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3048 { 3049 PetscErrorCode ierr; 3050 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3051 3052 PetscFunctionBegin; 3053 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3054 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3055 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3056 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3057 PetscFunctionReturn(0); 3058 } 3059 3060 /* -------------------------------------------------------------------*/ 3061 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3062 MatGetRow_MPIAIJ, 3063 MatRestoreRow_MPIAIJ, 3064 MatMult_MPIAIJ, 3065 /* 4*/ MatMultAdd_MPIAIJ, 3066 MatMultTranspose_MPIAIJ, 3067 MatMultTransposeAdd_MPIAIJ, 3068 #if defined(PETSC_HAVE_PBGL) 3069 MatSolve_MPIAIJ, 3070 #else 3071 0, 3072 #endif 3073 0, 3074 0, 3075 /*10*/ 0, 3076 0, 3077 0, 3078 MatSOR_MPIAIJ, 3079 MatTranspose_MPIAIJ, 3080 /*15*/ MatGetInfo_MPIAIJ, 3081 MatEqual_MPIAIJ, 3082 MatGetDiagonal_MPIAIJ, 3083 MatDiagonalScale_MPIAIJ, 3084 MatNorm_MPIAIJ, 3085 /*20*/ MatAssemblyBegin_MPIAIJ, 3086 MatAssemblyEnd_MPIAIJ, 3087 MatSetOption_MPIAIJ, 3088 MatZeroEntries_MPIAIJ, 3089 /*24*/ MatZeroRows_MPIAIJ, 3090 0, 3091 #if defined(PETSC_HAVE_PBGL) 3092 0, 3093 #else 3094 0, 3095 #endif 3096 0, 3097 0, 3098 /*29*/ MatSetUp_MPIAIJ, 3099 #if defined(PETSC_HAVE_PBGL) 3100 0, 3101 #else 3102 0, 3103 #endif 3104 0, 3105 0, 3106 0, 3107 /*34*/ MatDuplicate_MPIAIJ, 3108 0, 3109 0, 3110 0, 3111 0, 3112 /*39*/ MatAXPY_MPIAIJ, 3113 MatGetSubMatrices_MPIAIJ, 3114 MatIncreaseOverlap_MPIAIJ, 3115 MatGetValues_MPIAIJ, 3116 MatCopy_MPIAIJ, 3117 /*44*/ MatGetRowMax_MPIAIJ, 3118 MatScale_MPIAIJ, 3119 0, 3120 MatDiagonalSet_MPIAIJ, 3121 MatZeroRowsColumns_MPIAIJ, 3122 /*49*/ MatSetRandom_MPIAIJ, 3123 0, 3124 0, 3125 0, 3126 0, 3127 /*54*/ MatFDColoringCreate_MPIXAIJ, 3128 0, 3129 MatSetUnfactored_MPIAIJ, 3130 MatPermute_MPIAIJ, 3131 0, 3132 /*59*/ MatGetSubMatrix_MPIAIJ, 3133 MatDestroy_MPIAIJ, 3134 MatView_MPIAIJ, 3135 0, 3136 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3137 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3138 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3139 0, 3140 0, 3141 0, 3142 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3143 MatGetRowMinAbs_MPIAIJ, 3144 0, 3145 MatSetColoring_MPIAIJ, 3146 0, 3147 MatSetValuesAdifor_MPIAIJ, 3148 /*75*/ MatFDColoringApply_AIJ, 3149 0, 3150 0, 3151 0, 3152 MatFindZeroDiagonals_MPIAIJ, 3153 /*80*/ 0, 3154 0, 3155 0, 3156 /*83*/ MatLoad_MPIAIJ, 3157 0, 3158 0, 3159 0, 3160 0, 3161 0, 3162 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3163 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3164 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3165 MatPtAP_MPIAIJ_MPIAIJ, 3166 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3167 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3168 0, 3169 0, 3170 0, 3171 0, 3172 /*99*/ 0, 3173 0, 3174 0, 3175 MatConjugate_MPIAIJ, 3176 0, 3177 /*104*/MatSetValuesRow_MPIAIJ, 3178 MatRealPart_MPIAIJ, 3179 MatImaginaryPart_MPIAIJ, 3180 0, 3181 0, 3182 /*109*/0, 3183 MatGetRedundantMatrix_MPIAIJ, 3184 MatGetRowMin_MPIAIJ, 3185 0, 3186 0, 3187 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3188 0, 3189 0, 3190 0, 3191 0, 3192 /*119*/0, 3193 0, 3194 0, 3195 0, 3196 MatGetMultiProcBlock_MPIAIJ, 3197 /*124*/MatFindNonzeroRows_MPIAIJ, 3198 MatGetColumnNorms_MPIAIJ, 3199 MatInvertBlockDiagonal_MPIAIJ, 3200 0, 3201 MatGetSubMatricesParallel_MPIAIJ, 3202 /*129*/0, 3203 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3204 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3205 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3206 0, 3207 /*134*/0, 3208 0, 3209 0, 3210 0, 3211 0, 3212 /*139*/0, 3213 0, 3214 0, 3215 MatFDColoringSetUp_MPIXAIJ 3216 }; 3217 3218 /* ----------------------------------------------------------------------------------------*/ 3219 3220 #undef __FUNCT__ 3221 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3222 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3223 { 3224 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3225 PetscErrorCode ierr; 3226 3227 PetscFunctionBegin; 3228 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3229 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3230 PetscFunctionReturn(0); 3231 } 3232 3233 #undef __FUNCT__ 3234 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3235 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3236 { 3237 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3238 PetscErrorCode ierr; 3239 3240 PetscFunctionBegin; 3241 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3242 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3243 PetscFunctionReturn(0); 3244 } 3245 3246 #undef __FUNCT__ 3247 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3248 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3249 { 3250 Mat_MPIAIJ *b; 3251 PetscErrorCode ierr; 3252 3253 PetscFunctionBegin; 3254 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3255 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3256 b = (Mat_MPIAIJ*)B->data; 3257 3258 if (!B->preallocated) { 3259 /* Explicitly create 2 MATSEQAIJ matrices. */ 3260 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3261 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3262 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3263 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3264 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3265 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3266 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3267 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3268 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3269 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3270 } 3271 3272 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3273 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3274 B->preallocated = PETSC_TRUE; 3275 PetscFunctionReturn(0); 3276 } 3277 3278 #undef __FUNCT__ 3279 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3280 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3281 { 3282 Mat mat; 3283 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3284 PetscErrorCode ierr; 3285 3286 PetscFunctionBegin; 3287 *newmat = 0; 3288 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3289 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3290 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3291 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3292 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3293 a = (Mat_MPIAIJ*)mat->data; 3294 3295 mat->factortype = matin->factortype; 3296 mat->assembled = PETSC_TRUE; 3297 mat->insertmode = NOT_SET_VALUES; 3298 mat->preallocated = PETSC_TRUE; 3299 3300 a->size = oldmat->size; 3301 a->rank = oldmat->rank; 3302 a->donotstash = oldmat->donotstash; 3303 a->roworiented = oldmat->roworiented; 3304 a->rowindices = 0; 3305 a->rowvalues = 0; 3306 a->getrowactive = PETSC_FALSE; 3307 3308 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3309 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3310 3311 if (oldmat->colmap) { 3312 #if defined(PETSC_USE_CTABLE) 3313 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3314 #else 3315 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3316 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3317 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3318 #endif 3319 } else a->colmap = 0; 3320 if (oldmat->garray) { 3321 PetscInt len; 3322 len = oldmat->B->cmap->n; 3323 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3324 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3325 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3326 } else a->garray = 0; 3327 3328 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3329 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3330 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3331 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3332 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3333 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3334 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3335 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3336 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3337 *newmat = mat; 3338 PetscFunctionReturn(0); 3339 } 3340 3341 3342 3343 #undef __FUNCT__ 3344 #define __FUNCT__ "MatLoad_MPIAIJ" 3345 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3346 { 3347 PetscScalar *vals,*svals; 3348 MPI_Comm comm; 3349 PetscErrorCode ierr; 3350 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3351 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3352 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3353 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3354 PetscInt cend,cstart,n,*rowners,sizesset=1; 3355 int fd; 3356 PetscInt bs = 1; 3357 3358 PetscFunctionBegin; 3359 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3360 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3361 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3362 if (!rank) { 3363 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3364 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3365 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3366 } 3367 3368 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3369 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3370 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3371 3372 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3373 3374 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3375 M = header[1]; N = header[2]; 3376 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3377 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3378 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3379 3380 /* If global sizes are set, check if they are consistent with that given in the file */ 3381 if (sizesset) { 3382 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3383 } 3384 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3385 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3386 3387 /* determine ownership of all (block) rows */ 3388 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3389 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3390 else m = newMat->rmap->n; /* Set by user */ 3391 3392 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3393 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3394 3395 /* First process needs enough room for process with most rows */ 3396 if (!rank) { 3397 mmax = rowners[1]; 3398 for (i=2; i<=size; i++) { 3399 mmax = PetscMax(mmax, rowners[i]); 3400 } 3401 } else mmax = -1; /* unused, but compilers complain */ 3402 3403 rowners[0] = 0; 3404 for (i=2; i<=size; i++) { 3405 rowners[i] += rowners[i-1]; 3406 } 3407 rstart = rowners[rank]; 3408 rend = rowners[rank+1]; 3409 3410 /* distribute row lengths to all processors */ 3411 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3412 if (!rank) { 3413 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3414 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3415 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3416 for (j=0; j<m; j++) { 3417 procsnz[0] += ourlens[j]; 3418 } 3419 for (i=1; i<size; i++) { 3420 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3421 /* calculate the number of nonzeros on each processor */ 3422 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3423 procsnz[i] += rowlengths[j]; 3424 } 3425 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3426 } 3427 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3428 } else { 3429 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3430 } 3431 3432 if (!rank) { 3433 /* determine max buffer needed and allocate it */ 3434 maxnz = 0; 3435 for (i=0; i<size; i++) { 3436 maxnz = PetscMax(maxnz,procsnz[i]); 3437 } 3438 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3439 3440 /* read in my part of the matrix column indices */ 3441 nz = procsnz[0]; 3442 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3443 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3444 3445 /* read in every one elses and ship off */ 3446 for (i=1; i<size; i++) { 3447 nz = procsnz[i]; 3448 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3449 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3450 } 3451 ierr = PetscFree(cols);CHKERRQ(ierr); 3452 } else { 3453 /* determine buffer space needed for message */ 3454 nz = 0; 3455 for (i=0; i<m; i++) { 3456 nz += ourlens[i]; 3457 } 3458 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3459 3460 /* receive message of column indices*/ 3461 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3462 } 3463 3464 /* determine column ownership if matrix is not square */ 3465 if (N != M) { 3466 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3467 else n = newMat->cmap->n; 3468 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3469 cstart = cend - n; 3470 } else { 3471 cstart = rstart; 3472 cend = rend; 3473 n = cend - cstart; 3474 } 3475 3476 /* loop over local rows, determining number of off diagonal entries */ 3477 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3478 jj = 0; 3479 for (i=0; i<m; i++) { 3480 for (j=0; j<ourlens[i]; j++) { 3481 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3482 jj++; 3483 } 3484 } 3485 3486 for (i=0; i<m; i++) { 3487 ourlens[i] -= offlens[i]; 3488 } 3489 if (!sizesset) { 3490 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3491 } 3492 3493 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3494 3495 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3496 3497 for (i=0; i<m; i++) { 3498 ourlens[i] += offlens[i]; 3499 } 3500 3501 if (!rank) { 3502 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3503 3504 /* read in my part of the matrix numerical values */ 3505 nz = procsnz[0]; 3506 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3507 3508 /* insert into matrix */ 3509 jj = rstart; 3510 smycols = mycols; 3511 svals = vals; 3512 for (i=0; i<m; i++) { 3513 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3514 smycols += ourlens[i]; 3515 svals += ourlens[i]; 3516 jj++; 3517 } 3518 3519 /* read in other processors and ship out */ 3520 for (i=1; i<size; i++) { 3521 nz = procsnz[i]; 3522 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3523 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3524 } 3525 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3526 } else { 3527 /* receive numeric values */ 3528 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3529 3530 /* receive message of values*/ 3531 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3532 3533 /* insert into matrix */ 3534 jj = rstart; 3535 smycols = mycols; 3536 svals = vals; 3537 for (i=0; i<m; i++) { 3538 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3539 smycols += ourlens[i]; 3540 svals += ourlens[i]; 3541 jj++; 3542 } 3543 } 3544 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3545 ierr = PetscFree(vals);CHKERRQ(ierr); 3546 ierr = PetscFree(mycols);CHKERRQ(ierr); 3547 ierr = PetscFree(rowners);CHKERRQ(ierr); 3548 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3549 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3550 PetscFunctionReturn(0); 3551 } 3552 3553 #undef __FUNCT__ 3554 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3555 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3556 { 3557 PetscErrorCode ierr; 3558 IS iscol_local; 3559 PetscInt csize; 3560 3561 PetscFunctionBegin; 3562 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3563 if (call == MAT_REUSE_MATRIX) { 3564 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3565 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3566 } else { 3567 PetscInt cbs; 3568 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3569 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3570 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3571 } 3572 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3573 if (call == MAT_INITIAL_MATRIX) { 3574 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3575 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3576 } 3577 PetscFunctionReturn(0); 3578 } 3579 3580 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3581 #undef __FUNCT__ 3582 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3583 /* 3584 Not great since it makes two copies of the submatrix, first an SeqAIJ 3585 in local and then by concatenating the local matrices the end result. 3586 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3587 3588 Note: This requires a sequential iscol with all indices. 3589 */ 3590 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3591 { 3592 PetscErrorCode ierr; 3593 PetscMPIInt rank,size; 3594 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3595 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3596 PetscBool allcolumns, colflag; 3597 Mat M,Mreuse; 3598 MatScalar *vwork,*aa; 3599 MPI_Comm comm; 3600 Mat_SeqAIJ *aij; 3601 3602 PetscFunctionBegin; 3603 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3604 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3605 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3606 3607 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3608 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3609 if (colflag && ncol == mat->cmap->N) { 3610 allcolumns = PETSC_TRUE; 3611 } else { 3612 allcolumns = PETSC_FALSE; 3613 } 3614 if (call == MAT_REUSE_MATRIX) { 3615 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3616 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3617 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3618 } else { 3619 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3620 } 3621 3622 /* 3623 m - number of local rows 3624 n - number of columns (same on all processors) 3625 rstart - first row in new global matrix generated 3626 */ 3627 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3628 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3629 if (call == MAT_INITIAL_MATRIX) { 3630 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3631 ii = aij->i; 3632 jj = aij->j; 3633 3634 /* 3635 Determine the number of non-zeros in the diagonal and off-diagonal 3636 portions of the matrix in order to do correct preallocation 3637 */ 3638 3639 /* first get start and end of "diagonal" columns */ 3640 if (csize == PETSC_DECIDE) { 3641 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3642 if (mglobal == n) { /* square matrix */ 3643 nlocal = m; 3644 } else { 3645 nlocal = n/size + ((n % size) > rank); 3646 } 3647 } else { 3648 nlocal = csize; 3649 } 3650 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3651 rstart = rend - nlocal; 3652 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3653 3654 /* next, compute all the lengths */ 3655 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3656 olens = dlens + m; 3657 for (i=0; i<m; i++) { 3658 jend = ii[i+1] - ii[i]; 3659 olen = 0; 3660 dlen = 0; 3661 for (j=0; j<jend; j++) { 3662 if (*jj < rstart || *jj >= rend) olen++; 3663 else dlen++; 3664 jj++; 3665 } 3666 olens[i] = olen; 3667 dlens[i] = dlen; 3668 } 3669 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3670 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3671 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3672 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3673 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3674 ierr = PetscFree(dlens);CHKERRQ(ierr); 3675 } else { 3676 PetscInt ml,nl; 3677 3678 M = *newmat; 3679 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3680 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3681 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3682 /* 3683 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3684 rather than the slower MatSetValues(). 3685 */ 3686 M->was_assembled = PETSC_TRUE; 3687 M->assembled = PETSC_FALSE; 3688 } 3689 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3690 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3691 ii = aij->i; 3692 jj = aij->j; 3693 aa = aij->a; 3694 for (i=0; i<m; i++) { 3695 row = rstart + i; 3696 nz = ii[i+1] - ii[i]; 3697 cwork = jj; jj += nz; 3698 vwork = aa; aa += nz; 3699 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3700 } 3701 3702 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3703 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3704 *newmat = M; 3705 3706 /* save submatrix used in processor for next request */ 3707 if (call == MAT_INITIAL_MATRIX) { 3708 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3709 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3710 } 3711 PetscFunctionReturn(0); 3712 } 3713 3714 #undef __FUNCT__ 3715 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3716 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3717 { 3718 PetscInt m,cstart, cend,j,nnz,i,d; 3719 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3720 const PetscInt *JJ; 3721 PetscScalar *values; 3722 PetscErrorCode ierr; 3723 3724 PetscFunctionBegin; 3725 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3726 3727 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3728 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3729 m = B->rmap->n; 3730 cstart = B->cmap->rstart; 3731 cend = B->cmap->rend; 3732 rstart = B->rmap->rstart; 3733 3734 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3735 3736 #if defined(PETSC_USE_DEBUGGING) 3737 for (i=0; i<m; i++) { 3738 nnz = Ii[i+1]- Ii[i]; 3739 JJ = J + Ii[i]; 3740 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3741 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3742 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3743 } 3744 #endif 3745 3746 for (i=0; i<m; i++) { 3747 nnz = Ii[i+1]- Ii[i]; 3748 JJ = J + Ii[i]; 3749 nnz_max = PetscMax(nnz_max,nnz); 3750 d = 0; 3751 for (j=0; j<nnz; j++) { 3752 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3753 } 3754 d_nnz[i] = d; 3755 o_nnz[i] = nnz - d; 3756 } 3757 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3758 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3759 3760 if (v) values = (PetscScalar*)v; 3761 else { 3762 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3763 } 3764 3765 for (i=0; i<m; i++) { 3766 ii = i + rstart; 3767 nnz = Ii[i+1]- Ii[i]; 3768 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3769 } 3770 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3771 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3772 3773 if (!v) { 3774 ierr = PetscFree(values);CHKERRQ(ierr); 3775 } 3776 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3777 PetscFunctionReturn(0); 3778 } 3779 3780 #undef __FUNCT__ 3781 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3782 /*@ 3783 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3784 (the default parallel PETSc format). 3785 3786 Collective on MPI_Comm 3787 3788 Input Parameters: 3789 + B - the matrix 3790 . i - the indices into j for the start of each local row (starts with zero) 3791 . j - the column indices for each local row (starts with zero) 3792 - v - optional values in the matrix 3793 3794 Level: developer 3795 3796 Notes: 3797 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3798 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3799 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3800 3801 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3802 3803 The format which is used for the sparse matrix input, is equivalent to a 3804 row-major ordering.. i.e for the following matrix, the input data expected is 3805 as shown: 3806 3807 1 0 0 3808 2 0 3 P0 3809 ------- 3810 4 5 6 P1 3811 3812 Process0 [P0]: rows_owned=[0,1] 3813 i = {0,1,3} [size = nrow+1 = 2+1] 3814 j = {0,0,2} [size = nz = 6] 3815 v = {1,2,3} [size = nz = 6] 3816 3817 Process1 [P1]: rows_owned=[2] 3818 i = {0,3} [size = nrow+1 = 1+1] 3819 j = {0,1,2} [size = nz = 6] 3820 v = {4,5,6} [size = nz = 6] 3821 3822 .keywords: matrix, aij, compressed row, sparse, parallel 3823 3824 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3825 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3826 @*/ 3827 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3828 { 3829 PetscErrorCode ierr; 3830 3831 PetscFunctionBegin; 3832 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3833 PetscFunctionReturn(0); 3834 } 3835 3836 #undef __FUNCT__ 3837 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3838 /*@C 3839 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3840 (the default parallel PETSc format). For good matrix assembly performance 3841 the user should preallocate the matrix storage by setting the parameters 3842 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3843 performance can be increased by more than a factor of 50. 3844 3845 Collective on MPI_Comm 3846 3847 Input Parameters: 3848 + B - the matrix 3849 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3850 (same value is used for all local rows) 3851 . d_nnz - array containing the number of nonzeros in the various rows of the 3852 DIAGONAL portion of the local submatrix (possibly different for each row) 3853 or NULL, if d_nz is used to specify the nonzero structure. 3854 The size of this array is equal to the number of local rows, i.e 'm'. 3855 For matrices that will be factored, you must leave room for (and set) 3856 the diagonal entry even if it is zero. 3857 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3858 submatrix (same value is used for all local rows). 3859 - o_nnz - array containing the number of nonzeros in the various rows of the 3860 OFF-DIAGONAL portion of the local submatrix (possibly different for 3861 each row) or NULL, if o_nz is used to specify the nonzero 3862 structure. The size of this array is equal to the number 3863 of local rows, i.e 'm'. 3864 3865 If the *_nnz parameter is given then the *_nz parameter is ignored 3866 3867 The AIJ format (also called the Yale sparse matrix format or 3868 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3869 storage. The stored row and column indices begin with zero. 3870 See Users-Manual: ch_mat for details. 3871 3872 The parallel matrix is partitioned such that the first m0 rows belong to 3873 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3874 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3875 3876 The DIAGONAL portion of the local submatrix of a processor can be defined 3877 as the submatrix which is obtained by extraction the part corresponding to 3878 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3879 first row that belongs to the processor, r2 is the last row belonging to 3880 the this processor, and c1-c2 is range of indices of the local part of a 3881 vector suitable for applying the matrix to. This is an mxn matrix. In the 3882 common case of a square matrix, the row and column ranges are the same and 3883 the DIAGONAL part is also square. The remaining portion of the local 3884 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3885 3886 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3887 3888 You can call MatGetInfo() to get information on how effective the preallocation was; 3889 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3890 You can also run with the option -info and look for messages with the string 3891 malloc in them to see if additional memory allocation was needed. 3892 3893 Example usage: 3894 3895 Consider the following 8x8 matrix with 34 non-zero values, that is 3896 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3897 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3898 as follows: 3899 3900 .vb 3901 1 2 0 | 0 3 0 | 0 4 3902 Proc0 0 5 6 | 7 0 0 | 8 0 3903 9 0 10 | 11 0 0 | 12 0 3904 ------------------------------------- 3905 13 0 14 | 15 16 17 | 0 0 3906 Proc1 0 18 0 | 19 20 21 | 0 0 3907 0 0 0 | 22 23 0 | 24 0 3908 ------------------------------------- 3909 Proc2 25 26 27 | 0 0 28 | 29 0 3910 30 0 0 | 31 32 33 | 0 34 3911 .ve 3912 3913 This can be represented as a collection of submatrices as: 3914 3915 .vb 3916 A B C 3917 D E F 3918 G H I 3919 .ve 3920 3921 Where the submatrices A,B,C are owned by proc0, D,E,F are 3922 owned by proc1, G,H,I are owned by proc2. 3923 3924 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3925 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3926 The 'M','N' parameters are 8,8, and have the same values on all procs. 3927 3928 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3929 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3930 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3931 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3932 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3933 matrix, ans [DF] as another SeqAIJ matrix. 3934 3935 When d_nz, o_nz parameters are specified, d_nz storage elements are 3936 allocated for every row of the local diagonal submatrix, and o_nz 3937 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3938 One way to choose d_nz and o_nz is to use the max nonzerors per local 3939 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3940 In this case, the values of d_nz,o_nz are: 3941 .vb 3942 proc0 : dnz = 2, o_nz = 2 3943 proc1 : dnz = 3, o_nz = 2 3944 proc2 : dnz = 1, o_nz = 4 3945 .ve 3946 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3947 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3948 for proc3. i.e we are using 12+15+10=37 storage locations to store 3949 34 values. 3950 3951 When d_nnz, o_nnz parameters are specified, the storage is specified 3952 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3953 In the above case the values for d_nnz,o_nnz are: 3954 .vb 3955 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3956 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3957 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3958 .ve 3959 Here the space allocated is sum of all the above values i.e 34, and 3960 hence pre-allocation is perfect. 3961 3962 Level: intermediate 3963 3964 .keywords: matrix, aij, compressed row, sparse, parallel 3965 3966 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3967 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3968 @*/ 3969 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3970 { 3971 PetscErrorCode ierr; 3972 3973 PetscFunctionBegin; 3974 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3975 PetscValidType(B,1); 3976 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3977 PetscFunctionReturn(0); 3978 } 3979 3980 #undef __FUNCT__ 3981 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3982 /*@ 3983 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3984 CSR format the local rows. 3985 3986 Collective on MPI_Comm 3987 3988 Input Parameters: 3989 + comm - MPI communicator 3990 . m - number of local rows (Cannot be PETSC_DECIDE) 3991 . n - This value should be the same as the local size used in creating the 3992 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3993 calculated if N is given) For square matrices n is almost always m. 3994 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3995 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3996 . i - row indices 3997 . j - column indices 3998 - a - matrix values 3999 4000 Output Parameter: 4001 . mat - the matrix 4002 4003 Level: intermediate 4004 4005 Notes: 4006 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4007 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4008 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4009 4010 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4011 4012 The format which is used for the sparse matrix input, is equivalent to a 4013 row-major ordering.. i.e for the following matrix, the input data expected is 4014 as shown: 4015 4016 1 0 0 4017 2 0 3 P0 4018 ------- 4019 4 5 6 P1 4020 4021 Process0 [P0]: rows_owned=[0,1] 4022 i = {0,1,3} [size = nrow+1 = 2+1] 4023 j = {0,0,2} [size = nz = 6] 4024 v = {1,2,3} [size = nz = 6] 4025 4026 Process1 [P1]: rows_owned=[2] 4027 i = {0,3} [size = nrow+1 = 1+1] 4028 j = {0,1,2} [size = nz = 6] 4029 v = {4,5,6} [size = nz = 6] 4030 4031 .keywords: matrix, aij, compressed row, sparse, parallel 4032 4033 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4034 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4035 @*/ 4036 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4037 { 4038 PetscErrorCode ierr; 4039 4040 PetscFunctionBegin; 4041 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4042 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4043 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4044 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4045 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4046 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4047 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4048 PetscFunctionReturn(0); 4049 } 4050 4051 #undef __FUNCT__ 4052 #define __FUNCT__ "MatCreateAIJ" 4053 /*@C 4054 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4055 (the default parallel PETSc format). For good matrix assembly performance 4056 the user should preallocate the matrix storage by setting the parameters 4057 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4058 performance can be increased by more than a factor of 50. 4059 4060 Collective on MPI_Comm 4061 4062 Input Parameters: 4063 + comm - MPI communicator 4064 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4065 This value should be the same as the local size used in creating the 4066 y vector for the matrix-vector product y = Ax. 4067 . n - This value should be the same as the local size used in creating the 4068 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4069 calculated if N is given) For square matrices n is almost always m. 4070 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4071 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4072 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4073 (same value is used for all local rows) 4074 . d_nnz - array containing the number of nonzeros in the various rows of the 4075 DIAGONAL portion of the local submatrix (possibly different for each row) 4076 or NULL, if d_nz is used to specify the nonzero structure. 4077 The size of this array is equal to the number of local rows, i.e 'm'. 4078 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4079 submatrix (same value is used for all local rows). 4080 - o_nnz - array containing the number of nonzeros in the various rows of the 4081 OFF-DIAGONAL portion of the local submatrix (possibly different for 4082 each row) or NULL, if o_nz is used to specify the nonzero 4083 structure. The size of this array is equal to the number 4084 of local rows, i.e 'm'. 4085 4086 Output Parameter: 4087 . A - the matrix 4088 4089 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4090 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4091 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4092 4093 Notes: 4094 If the *_nnz parameter is given then the *_nz parameter is ignored 4095 4096 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4097 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4098 storage requirements for this matrix. 4099 4100 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4101 processor than it must be used on all processors that share the object for 4102 that argument. 4103 4104 The user MUST specify either the local or global matrix dimensions 4105 (possibly both). 4106 4107 The parallel matrix is partitioned across processors such that the 4108 first m0 rows belong to process 0, the next m1 rows belong to 4109 process 1, the next m2 rows belong to process 2 etc.. where 4110 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4111 values corresponding to [m x N] submatrix. 4112 4113 The columns are logically partitioned with the n0 columns belonging 4114 to 0th partition, the next n1 columns belonging to the next 4115 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4116 4117 The DIAGONAL portion of the local submatrix on any given processor 4118 is the submatrix corresponding to the rows and columns m,n 4119 corresponding to the given processor. i.e diagonal matrix on 4120 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4121 etc. The remaining portion of the local submatrix [m x (N-n)] 4122 constitute the OFF-DIAGONAL portion. The example below better 4123 illustrates this concept. 4124 4125 For a square global matrix we define each processor's diagonal portion 4126 to be its local rows and the corresponding columns (a square submatrix); 4127 each processor's off-diagonal portion encompasses the remainder of the 4128 local matrix (a rectangular submatrix). 4129 4130 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4131 4132 When calling this routine with a single process communicator, a matrix of 4133 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4134 type of communicator, use the construction mechanism: 4135 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4136 4137 By default, this format uses inodes (identical nodes) when possible. 4138 We search for consecutive rows with the same nonzero structure, thereby 4139 reusing matrix information to achieve increased efficiency. 4140 4141 Options Database Keys: 4142 + -mat_no_inode - Do not use inodes 4143 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4144 - -mat_aij_oneindex - Internally use indexing starting at 1 4145 rather than 0. Note that when calling MatSetValues(), 4146 the user still MUST index entries starting at 0! 4147 4148 4149 Example usage: 4150 4151 Consider the following 8x8 matrix with 34 non-zero values, that is 4152 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4153 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4154 as follows: 4155 4156 .vb 4157 1 2 0 | 0 3 0 | 0 4 4158 Proc0 0 5 6 | 7 0 0 | 8 0 4159 9 0 10 | 11 0 0 | 12 0 4160 ------------------------------------- 4161 13 0 14 | 15 16 17 | 0 0 4162 Proc1 0 18 0 | 19 20 21 | 0 0 4163 0 0 0 | 22 23 0 | 24 0 4164 ------------------------------------- 4165 Proc2 25 26 27 | 0 0 28 | 29 0 4166 30 0 0 | 31 32 33 | 0 34 4167 .ve 4168 4169 This can be represented as a collection of submatrices as: 4170 4171 .vb 4172 A B C 4173 D E F 4174 G H I 4175 .ve 4176 4177 Where the submatrices A,B,C are owned by proc0, D,E,F are 4178 owned by proc1, G,H,I are owned by proc2. 4179 4180 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4181 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4182 The 'M','N' parameters are 8,8, and have the same values on all procs. 4183 4184 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4185 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4186 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4187 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4188 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4189 matrix, ans [DF] as another SeqAIJ matrix. 4190 4191 When d_nz, o_nz parameters are specified, d_nz storage elements are 4192 allocated for every row of the local diagonal submatrix, and o_nz 4193 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4194 One way to choose d_nz and o_nz is to use the max nonzerors per local 4195 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4196 In this case, the values of d_nz,o_nz are: 4197 .vb 4198 proc0 : dnz = 2, o_nz = 2 4199 proc1 : dnz = 3, o_nz = 2 4200 proc2 : dnz = 1, o_nz = 4 4201 .ve 4202 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4203 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4204 for proc3. i.e we are using 12+15+10=37 storage locations to store 4205 34 values. 4206 4207 When d_nnz, o_nnz parameters are specified, the storage is specified 4208 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4209 In the above case the values for d_nnz,o_nnz are: 4210 .vb 4211 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4212 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4213 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4214 .ve 4215 Here the space allocated is sum of all the above values i.e 34, and 4216 hence pre-allocation is perfect. 4217 4218 Level: intermediate 4219 4220 .keywords: matrix, aij, compressed row, sparse, parallel 4221 4222 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4223 MPIAIJ, MatCreateMPIAIJWithArrays() 4224 @*/ 4225 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4226 { 4227 PetscErrorCode ierr; 4228 PetscMPIInt size; 4229 4230 PetscFunctionBegin; 4231 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4232 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4233 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4234 if (size > 1) { 4235 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4236 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4237 } else { 4238 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4239 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4240 } 4241 PetscFunctionReturn(0); 4242 } 4243 4244 #undef __FUNCT__ 4245 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4246 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4247 { 4248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4249 4250 PetscFunctionBegin; 4251 if (Ad) *Ad = a->A; 4252 if (Ao) *Ao = a->B; 4253 if (colmap) *colmap = a->garray; 4254 PetscFunctionReturn(0); 4255 } 4256 4257 #undef __FUNCT__ 4258 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4259 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4260 { 4261 PetscErrorCode ierr; 4262 PetscInt i; 4263 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4264 4265 PetscFunctionBegin; 4266 if (coloring->ctype == IS_COLORING_GLOBAL) { 4267 ISColoringValue *allcolors,*colors; 4268 ISColoring ocoloring; 4269 4270 /* set coloring for diagonal portion */ 4271 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4272 4273 /* set coloring for off-diagonal portion */ 4274 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4275 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4276 for (i=0; i<a->B->cmap->n; i++) { 4277 colors[i] = allcolors[a->garray[i]]; 4278 } 4279 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4280 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4281 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4282 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4283 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4284 ISColoringValue *colors; 4285 PetscInt *larray; 4286 ISColoring ocoloring; 4287 4288 /* set coloring for diagonal portion */ 4289 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4290 for (i=0; i<a->A->cmap->n; i++) { 4291 larray[i] = i + A->cmap->rstart; 4292 } 4293 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4294 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4295 for (i=0; i<a->A->cmap->n; i++) { 4296 colors[i] = coloring->colors[larray[i]]; 4297 } 4298 ierr = PetscFree(larray);CHKERRQ(ierr); 4299 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4300 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4301 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4302 4303 /* set coloring for off-diagonal portion */ 4304 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4305 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4306 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4307 for (i=0; i<a->B->cmap->n; i++) { 4308 colors[i] = coloring->colors[larray[i]]; 4309 } 4310 ierr = PetscFree(larray);CHKERRQ(ierr); 4311 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4312 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4313 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4314 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4315 PetscFunctionReturn(0); 4316 } 4317 4318 #undef __FUNCT__ 4319 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4320 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4321 { 4322 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4323 PetscErrorCode ierr; 4324 4325 PetscFunctionBegin; 4326 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4327 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4328 PetscFunctionReturn(0); 4329 } 4330 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4333 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4334 { 4335 PetscErrorCode ierr; 4336 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4337 PetscInt *indx; 4338 4339 PetscFunctionBegin; 4340 /* This routine will ONLY return MPIAIJ type matrix */ 4341 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4342 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4343 if (n == PETSC_DECIDE) { 4344 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4345 } 4346 /* Check sum(n) = N */ 4347 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4348 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4349 4350 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4351 rstart -= m; 4352 4353 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4354 for (i=0; i<m; i++) { 4355 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4356 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4357 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4358 } 4359 4360 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4361 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4362 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4363 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4364 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4365 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4366 PetscFunctionReturn(0); 4367 } 4368 4369 #undef __FUNCT__ 4370 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4371 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4372 { 4373 PetscErrorCode ierr; 4374 PetscInt m,N,i,rstart,nnz,Ii; 4375 PetscInt *indx; 4376 PetscScalar *values; 4377 4378 PetscFunctionBegin; 4379 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4380 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4381 for (i=0; i<m; i++) { 4382 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4383 Ii = i + rstart; 4384 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4385 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4386 } 4387 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4388 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4389 PetscFunctionReturn(0); 4390 } 4391 4392 #undef __FUNCT__ 4393 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4394 /*@ 4395 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4396 matrices from each processor 4397 4398 Collective on MPI_Comm 4399 4400 Input Parameters: 4401 + comm - the communicators the parallel matrix will live on 4402 . inmat - the input sequential matrices 4403 . n - number of local columns (or PETSC_DECIDE) 4404 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4405 4406 Output Parameter: 4407 . outmat - the parallel matrix generated 4408 4409 Level: advanced 4410 4411 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4412 4413 @*/ 4414 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4415 { 4416 PetscErrorCode ierr; 4417 PetscMPIInt size; 4418 4419 PetscFunctionBegin; 4420 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4421 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4422 if (size == 1) { 4423 if (scall == MAT_INITIAL_MATRIX) { 4424 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4425 } else { 4426 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4427 } 4428 } else { 4429 if (scall == MAT_INITIAL_MATRIX) { 4430 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4431 } 4432 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4433 } 4434 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4435 PetscFunctionReturn(0); 4436 } 4437 4438 #undef __FUNCT__ 4439 #define __FUNCT__ "MatFileSplit" 4440 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4441 { 4442 PetscErrorCode ierr; 4443 PetscMPIInt rank; 4444 PetscInt m,N,i,rstart,nnz; 4445 size_t len; 4446 const PetscInt *indx; 4447 PetscViewer out; 4448 char *name; 4449 Mat B; 4450 const PetscScalar *values; 4451 4452 PetscFunctionBegin; 4453 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4454 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4455 /* Should this be the type of the diagonal block of A? */ 4456 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4457 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4458 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4459 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4460 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4461 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4462 for (i=0; i<m; i++) { 4463 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4464 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4465 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4466 } 4467 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4468 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4469 4470 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4471 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4472 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4473 sprintf(name,"%s.%d",outfile,rank); 4474 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4475 ierr = PetscFree(name);CHKERRQ(ierr); 4476 ierr = MatView(B,out);CHKERRQ(ierr); 4477 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4478 ierr = MatDestroy(&B);CHKERRQ(ierr); 4479 PetscFunctionReturn(0); 4480 } 4481 4482 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4483 #undef __FUNCT__ 4484 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4485 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4486 { 4487 PetscErrorCode ierr; 4488 Mat_Merge_SeqsToMPI *merge; 4489 PetscContainer container; 4490 4491 PetscFunctionBegin; 4492 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4493 if (container) { 4494 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4505 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4506 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4507 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4508 ierr = PetscFree(merge);CHKERRQ(ierr); 4509 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4510 } 4511 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4512 PetscFunctionReturn(0); 4513 } 4514 4515 #include <../src/mat/utils/freespace.h> 4516 #include <petscbt.h> 4517 4518 #undef __FUNCT__ 4519 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4520 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4521 { 4522 PetscErrorCode ierr; 4523 MPI_Comm comm; 4524 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4525 PetscMPIInt size,rank,taga,*len_s; 4526 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4527 PetscInt proc,m; 4528 PetscInt **buf_ri,**buf_rj; 4529 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4530 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4531 MPI_Request *s_waits,*r_waits; 4532 MPI_Status *status; 4533 MatScalar *aa=a->a; 4534 MatScalar **abuf_r,*ba_i; 4535 Mat_Merge_SeqsToMPI *merge; 4536 PetscContainer container; 4537 4538 PetscFunctionBegin; 4539 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4540 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4541 4542 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4543 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4544 4545 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4546 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4547 4548 bi = merge->bi; 4549 bj = merge->bj; 4550 buf_ri = merge->buf_ri; 4551 buf_rj = merge->buf_rj; 4552 4553 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4554 owners = merge->rowmap->range; 4555 len_s = merge->len_s; 4556 4557 /* send and recv matrix values */ 4558 /*-----------------------------*/ 4559 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4560 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4561 4562 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4563 for (proc=0,k=0; proc<size; proc++) { 4564 if (!len_s[proc]) continue; 4565 i = owners[proc]; 4566 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4567 k++; 4568 } 4569 4570 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4571 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4572 ierr = PetscFree(status);CHKERRQ(ierr); 4573 4574 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4575 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4576 4577 /* insert mat values of mpimat */ 4578 /*----------------------------*/ 4579 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4580 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4581 4582 for (k=0; k<merge->nrecv; k++) { 4583 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4584 nrows = *(buf_ri_k[k]); 4585 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4586 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4587 } 4588 4589 /* set values of ba */ 4590 m = merge->rowmap->n; 4591 for (i=0; i<m; i++) { 4592 arow = owners[rank] + i; 4593 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4594 bnzi = bi[i+1] - bi[i]; 4595 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4596 4597 /* add local non-zero vals of this proc's seqmat into ba */ 4598 anzi = ai[arow+1] - ai[arow]; 4599 aj = a->j + ai[arow]; 4600 aa = a->a + ai[arow]; 4601 nextaj = 0; 4602 for (j=0; nextaj<anzi; j++) { 4603 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4604 ba_i[j] += aa[nextaj++]; 4605 } 4606 } 4607 4608 /* add received vals into ba */ 4609 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4610 /* i-th row */ 4611 if (i == *nextrow[k]) { 4612 anzi = *(nextai[k]+1) - *nextai[k]; 4613 aj = buf_rj[k] + *(nextai[k]); 4614 aa = abuf_r[k] + *(nextai[k]); 4615 nextaj = 0; 4616 for (j=0; nextaj<anzi; j++) { 4617 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4618 ba_i[j] += aa[nextaj++]; 4619 } 4620 } 4621 nextrow[k]++; nextai[k]++; 4622 } 4623 } 4624 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4625 } 4626 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4627 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4628 4629 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4630 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4631 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4632 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4633 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4634 PetscFunctionReturn(0); 4635 } 4636 4637 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4638 4639 #undef __FUNCT__ 4640 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4641 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4642 { 4643 PetscErrorCode ierr; 4644 Mat B_mpi; 4645 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4646 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4647 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4648 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4649 PetscInt len,proc,*dnz,*onz,bs,cbs; 4650 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4651 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4652 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4653 MPI_Status *status; 4654 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4655 PetscBT lnkbt; 4656 Mat_Merge_SeqsToMPI *merge; 4657 PetscContainer container; 4658 4659 PetscFunctionBegin; 4660 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4661 4662 /* make sure it is a PETSc comm */ 4663 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4664 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4665 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4666 4667 ierr = PetscNew(&merge);CHKERRQ(ierr); 4668 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4669 4670 /* determine row ownership */ 4671 /*---------------------------------------------------------*/ 4672 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4673 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4674 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4675 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4676 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4677 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4678 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4679 4680 m = merge->rowmap->n; 4681 owners = merge->rowmap->range; 4682 4683 /* determine the number of messages to send, their lengths */ 4684 /*---------------------------------------------------------*/ 4685 len_s = merge->len_s; 4686 4687 len = 0; /* length of buf_si[] */ 4688 merge->nsend = 0; 4689 for (proc=0; proc<size; proc++) { 4690 len_si[proc] = 0; 4691 if (proc == rank) { 4692 len_s[proc] = 0; 4693 } else { 4694 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4695 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4696 } 4697 if (len_s[proc]) { 4698 merge->nsend++; 4699 nrows = 0; 4700 for (i=owners[proc]; i<owners[proc+1]; i++) { 4701 if (ai[i+1] > ai[i]) nrows++; 4702 } 4703 len_si[proc] = 2*(nrows+1); 4704 len += len_si[proc]; 4705 } 4706 } 4707 4708 /* determine the number and length of messages to receive for ij-structure */ 4709 /*-------------------------------------------------------------------------*/ 4710 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4711 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4712 4713 /* post the Irecv of j-structure */ 4714 /*-------------------------------*/ 4715 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4716 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4717 4718 /* post the Isend of j-structure */ 4719 /*--------------------------------*/ 4720 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4721 4722 for (proc=0, k=0; proc<size; proc++) { 4723 if (!len_s[proc]) continue; 4724 i = owners[proc]; 4725 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4726 k++; 4727 } 4728 4729 /* receives and sends of j-structure are complete */ 4730 /*------------------------------------------------*/ 4731 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4732 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4733 4734 /* send and recv i-structure */ 4735 /*---------------------------*/ 4736 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4737 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4738 4739 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4740 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4741 for (proc=0,k=0; proc<size; proc++) { 4742 if (!len_s[proc]) continue; 4743 /* form outgoing message for i-structure: 4744 buf_si[0]: nrows to be sent 4745 [1:nrows]: row index (global) 4746 [nrows+1:2*nrows+1]: i-structure index 4747 */ 4748 /*-------------------------------------------*/ 4749 nrows = len_si[proc]/2 - 1; 4750 buf_si_i = buf_si + nrows+1; 4751 buf_si[0] = nrows; 4752 buf_si_i[0] = 0; 4753 nrows = 0; 4754 for (i=owners[proc]; i<owners[proc+1]; i++) { 4755 anzi = ai[i+1] - ai[i]; 4756 if (anzi) { 4757 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4758 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4759 nrows++; 4760 } 4761 } 4762 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4763 k++; 4764 buf_si += len_si[proc]; 4765 } 4766 4767 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4768 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4769 4770 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4771 for (i=0; i<merge->nrecv; i++) { 4772 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4773 } 4774 4775 ierr = PetscFree(len_si);CHKERRQ(ierr); 4776 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4777 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4778 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4779 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4780 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4781 ierr = PetscFree(status);CHKERRQ(ierr); 4782 4783 /* compute a local seq matrix in each processor */ 4784 /*----------------------------------------------*/ 4785 /* allocate bi array and free space for accumulating nonzero column info */ 4786 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4787 bi[0] = 0; 4788 4789 /* create and initialize a linked list */ 4790 nlnk = N+1; 4791 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4792 4793 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4794 len = ai[owners[rank+1]] - ai[owners[rank]]; 4795 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4796 4797 current_space = free_space; 4798 4799 /* determine symbolic info for each local row */ 4800 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4801 4802 for (k=0; k<merge->nrecv; k++) { 4803 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4804 nrows = *buf_ri_k[k]; 4805 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4806 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4807 } 4808 4809 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4810 len = 0; 4811 for (i=0; i<m; i++) { 4812 bnzi = 0; 4813 /* add local non-zero cols of this proc's seqmat into lnk */ 4814 arow = owners[rank] + i; 4815 anzi = ai[arow+1] - ai[arow]; 4816 aj = a->j + ai[arow]; 4817 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4818 bnzi += nlnk; 4819 /* add received col data into lnk */ 4820 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4821 if (i == *nextrow[k]) { /* i-th row */ 4822 anzi = *(nextai[k]+1) - *nextai[k]; 4823 aj = buf_rj[k] + *nextai[k]; 4824 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4825 bnzi += nlnk; 4826 nextrow[k]++; nextai[k]++; 4827 } 4828 } 4829 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4830 4831 /* if free space is not available, make more free space */ 4832 if (current_space->local_remaining<bnzi) { 4833 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4834 nspacedouble++; 4835 } 4836 /* copy data into free space, then initialize lnk */ 4837 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4838 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4839 4840 current_space->array += bnzi; 4841 current_space->local_used += bnzi; 4842 current_space->local_remaining -= bnzi; 4843 4844 bi[i+1] = bi[i] + bnzi; 4845 } 4846 4847 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4848 4849 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4850 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4851 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4852 4853 /* create symbolic parallel matrix B_mpi */ 4854 /*---------------------------------------*/ 4855 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4856 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4857 if (n==PETSC_DECIDE) { 4858 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4859 } else { 4860 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4861 } 4862 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4863 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4864 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4865 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4866 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4867 4868 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4869 B_mpi->assembled = PETSC_FALSE; 4870 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4871 merge->bi = bi; 4872 merge->bj = bj; 4873 merge->buf_ri = buf_ri; 4874 merge->buf_rj = buf_rj; 4875 merge->coi = NULL; 4876 merge->coj = NULL; 4877 merge->owners_co = NULL; 4878 4879 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4880 4881 /* attach the supporting struct to B_mpi for reuse */ 4882 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4883 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4884 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4885 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4886 *mpimat = B_mpi; 4887 4888 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4889 PetscFunctionReturn(0); 4890 } 4891 4892 #undef __FUNCT__ 4893 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4894 /*@C 4895 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4896 matrices from each processor 4897 4898 Collective on MPI_Comm 4899 4900 Input Parameters: 4901 + comm - the communicators the parallel matrix will live on 4902 . seqmat - the input sequential matrices 4903 . m - number of local rows (or PETSC_DECIDE) 4904 . n - number of local columns (or PETSC_DECIDE) 4905 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4906 4907 Output Parameter: 4908 . mpimat - the parallel matrix generated 4909 4910 Level: advanced 4911 4912 Notes: 4913 The dimensions of the sequential matrix in each processor MUST be the same. 4914 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4915 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4916 @*/ 4917 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4918 { 4919 PetscErrorCode ierr; 4920 PetscMPIInt size; 4921 4922 PetscFunctionBegin; 4923 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4924 if (size == 1) { 4925 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4926 if (scall == MAT_INITIAL_MATRIX) { 4927 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4928 } else { 4929 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4930 } 4931 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4932 PetscFunctionReturn(0); 4933 } 4934 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4935 if (scall == MAT_INITIAL_MATRIX) { 4936 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4937 } 4938 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4939 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4940 PetscFunctionReturn(0); 4941 } 4942 4943 #undef __FUNCT__ 4944 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4945 /*@ 4946 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4947 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4948 with MatGetSize() 4949 4950 Not Collective 4951 4952 Input Parameters: 4953 + A - the matrix 4954 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4955 4956 Output Parameter: 4957 . A_loc - the local sequential matrix generated 4958 4959 Level: developer 4960 4961 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4962 4963 @*/ 4964 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4965 { 4966 PetscErrorCode ierr; 4967 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4968 Mat_SeqAIJ *mat,*a,*b; 4969 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4970 MatScalar *aa,*ba,*cam; 4971 PetscScalar *ca; 4972 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4973 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4974 PetscBool match; 4975 4976 PetscFunctionBegin; 4977 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4978 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4979 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4980 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4981 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4982 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4983 aa = a->a; ba = b->a; 4984 if (scall == MAT_INITIAL_MATRIX) { 4985 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4986 ci[0] = 0; 4987 for (i=0; i<am; i++) { 4988 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4989 } 4990 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4991 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4992 k = 0; 4993 for (i=0; i<am; i++) { 4994 ncols_o = bi[i+1] - bi[i]; 4995 ncols_d = ai[i+1] - ai[i]; 4996 /* off-diagonal portion of A */ 4997 for (jo=0; jo<ncols_o; jo++) { 4998 col = cmap[*bj]; 4999 if (col >= cstart) break; 5000 cj[k] = col; bj++; 5001 ca[k++] = *ba++; 5002 } 5003 /* diagonal portion of A */ 5004 for (j=0; j<ncols_d; j++) { 5005 cj[k] = cstart + *aj++; 5006 ca[k++] = *aa++; 5007 } 5008 /* off-diagonal portion of A */ 5009 for (j=jo; j<ncols_o; j++) { 5010 cj[k] = cmap[*bj++]; 5011 ca[k++] = *ba++; 5012 } 5013 } 5014 /* put together the new matrix */ 5015 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5016 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5017 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5018 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5019 mat->free_a = PETSC_TRUE; 5020 mat->free_ij = PETSC_TRUE; 5021 mat->nonew = 0; 5022 } else if (scall == MAT_REUSE_MATRIX) { 5023 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5024 ci = mat->i; cj = mat->j; cam = mat->a; 5025 for (i=0; i<am; i++) { 5026 /* off-diagonal portion of A */ 5027 ncols_o = bi[i+1] - bi[i]; 5028 for (jo=0; jo<ncols_o; jo++) { 5029 col = cmap[*bj]; 5030 if (col >= cstart) break; 5031 *cam++ = *ba++; bj++; 5032 } 5033 /* diagonal portion of A */ 5034 ncols_d = ai[i+1] - ai[i]; 5035 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5036 /* off-diagonal portion of A */ 5037 for (j=jo; j<ncols_o; j++) { 5038 *cam++ = *ba++; bj++; 5039 } 5040 } 5041 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5042 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5043 PetscFunctionReturn(0); 5044 } 5045 5046 #undef __FUNCT__ 5047 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5048 /*@C 5049 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5050 5051 Not Collective 5052 5053 Input Parameters: 5054 + A - the matrix 5055 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5056 - row, col - index sets of rows and columns to extract (or NULL) 5057 5058 Output Parameter: 5059 . A_loc - the local sequential matrix generated 5060 5061 Level: developer 5062 5063 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5064 5065 @*/ 5066 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5067 { 5068 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5069 PetscErrorCode ierr; 5070 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5071 IS isrowa,iscola; 5072 Mat *aloc; 5073 PetscBool match; 5074 5075 PetscFunctionBegin; 5076 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5077 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5078 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5079 if (!row) { 5080 start = A->rmap->rstart; end = A->rmap->rend; 5081 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5082 } else { 5083 isrowa = *row; 5084 } 5085 if (!col) { 5086 start = A->cmap->rstart; 5087 cmap = a->garray; 5088 nzA = a->A->cmap->n; 5089 nzB = a->B->cmap->n; 5090 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5091 ncols = 0; 5092 for (i=0; i<nzB; i++) { 5093 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5094 else break; 5095 } 5096 imark = i; 5097 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5098 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5099 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5100 } else { 5101 iscola = *col; 5102 } 5103 if (scall != MAT_INITIAL_MATRIX) { 5104 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5105 aloc[0] = *A_loc; 5106 } 5107 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5108 *A_loc = aloc[0]; 5109 ierr = PetscFree(aloc);CHKERRQ(ierr); 5110 if (!row) { 5111 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5112 } 5113 if (!col) { 5114 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5115 } 5116 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5117 PetscFunctionReturn(0); 5118 } 5119 5120 #undef __FUNCT__ 5121 #define __FUNCT__ "MatGetBrowsOfAcols" 5122 /*@C 5123 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5124 5125 Collective on Mat 5126 5127 Input Parameters: 5128 + A,B - the matrices in mpiaij format 5129 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5130 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5131 5132 Output Parameter: 5133 + rowb, colb - index sets of rows and columns of B to extract 5134 - B_seq - the sequential matrix generated 5135 5136 Level: developer 5137 5138 @*/ 5139 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5140 { 5141 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5142 PetscErrorCode ierr; 5143 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5144 IS isrowb,iscolb; 5145 Mat *bseq=NULL; 5146 5147 PetscFunctionBegin; 5148 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5149 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5150 } 5151 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5152 5153 if (scall == MAT_INITIAL_MATRIX) { 5154 start = A->cmap->rstart; 5155 cmap = a->garray; 5156 nzA = a->A->cmap->n; 5157 nzB = a->B->cmap->n; 5158 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5159 ncols = 0; 5160 for (i=0; i<nzB; i++) { /* row < local row index */ 5161 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5162 else break; 5163 } 5164 imark = i; 5165 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5166 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5167 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5168 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5169 } else { 5170 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5171 isrowb = *rowb; iscolb = *colb; 5172 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5173 bseq[0] = *B_seq; 5174 } 5175 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5176 *B_seq = bseq[0]; 5177 ierr = PetscFree(bseq);CHKERRQ(ierr); 5178 if (!rowb) { 5179 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5180 } else { 5181 *rowb = isrowb; 5182 } 5183 if (!colb) { 5184 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5185 } else { 5186 *colb = iscolb; 5187 } 5188 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5189 PetscFunctionReturn(0); 5190 } 5191 5192 #undef __FUNCT__ 5193 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5194 /* 5195 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5196 of the OFF-DIAGONAL portion of local A 5197 5198 Collective on Mat 5199 5200 Input Parameters: 5201 + A,B - the matrices in mpiaij format 5202 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5203 5204 Output Parameter: 5205 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5206 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5207 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5208 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5209 5210 Level: developer 5211 5212 */ 5213 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5214 { 5215 VecScatter_MPI_General *gen_to,*gen_from; 5216 PetscErrorCode ierr; 5217 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5218 Mat_SeqAIJ *b_oth; 5219 VecScatter ctx =a->Mvctx; 5220 MPI_Comm comm; 5221 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5222 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5223 PetscScalar *rvalues,*svalues; 5224 MatScalar *b_otha,*bufa,*bufA; 5225 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5226 MPI_Request *rwaits = NULL,*swaits = NULL; 5227 MPI_Status *sstatus,rstatus; 5228 PetscMPIInt jj; 5229 PetscInt *cols,sbs,rbs; 5230 PetscScalar *vals; 5231 5232 PetscFunctionBegin; 5233 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5234 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5235 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5236 } 5237 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5238 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5239 5240 gen_to = (VecScatter_MPI_General*)ctx->todata; 5241 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5242 rvalues = gen_from->values; /* holds the length of receiving row */ 5243 svalues = gen_to->values; /* holds the length of sending row */ 5244 nrecvs = gen_from->n; 5245 nsends = gen_to->n; 5246 5247 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5248 srow = gen_to->indices; /* local row index to be sent */ 5249 sstarts = gen_to->starts; 5250 sprocs = gen_to->procs; 5251 sstatus = gen_to->sstatus; 5252 sbs = gen_to->bs; 5253 rstarts = gen_from->starts; 5254 rprocs = gen_from->procs; 5255 rbs = gen_from->bs; 5256 5257 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5258 if (scall == MAT_INITIAL_MATRIX) { 5259 /* i-array */ 5260 /*---------*/ 5261 /* post receives */ 5262 for (i=0; i<nrecvs; i++) { 5263 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5264 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5265 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5266 } 5267 5268 /* pack the outgoing message */ 5269 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5270 5271 sstartsj[0] = 0; 5272 rstartsj[0] = 0; 5273 len = 0; /* total length of j or a array to be sent */ 5274 k = 0; 5275 for (i=0; i<nsends; i++) { 5276 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5277 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5278 for (j=0; j<nrows; j++) { 5279 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5280 for (l=0; l<sbs; l++) { 5281 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5282 5283 rowlen[j*sbs+l] = ncols; 5284 5285 len += ncols; 5286 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5287 } 5288 k++; 5289 } 5290 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5291 5292 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5293 } 5294 /* recvs and sends of i-array are completed */ 5295 i = nrecvs; 5296 while (i--) { 5297 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5298 } 5299 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5300 5301 /* allocate buffers for sending j and a arrays */ 5302 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5303 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5304 5305 /* create i-array of B_oth */ 5306 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5307 5308 b_othi[0] = 0; 5309 len = 0; /* total length of j or a array to be received */ 5310 k = 0; 5311 for (i=0; i<nrecvs; i++) { 5312 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5313 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5314 for (j=0; j<nrows; j++) { 5315 b_othi[k+1] = b_othi[k] + rowlen[j]; 5316 len += rowlen[j]; k++; 5317 } 5318 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5319 } 5320 5321 /* allocate space for j and a arrrays of B_oth */ 5322 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5323 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5324 5325 /* j-array */ 5326 /*---------*/ 5327 /* post receives of j-array */ 5328 for (i=0; i<nrecvs; i++) { 5329 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5330 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5331 } 5332 5333 /* pack the outgoing message j-array */ 5334 k = 0; 5335 for (i=0; i<nsends; i++) { 5336 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5337 bufJ = bufj+sstartsj[i]; 5338 for (j=0; j<nrows; j++) { 5339 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5340 for (ll=0; ll<sbs; ll++) { 5341 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5342 for (l=0; l<ncols; l++) { 5343 *bufJ++ = cols[l]; 5344 } 5345 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5346 } 5347 } 5348 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5349 } 5350 5351 /* recvs and sends of j-array are completed */ 5352 i = nrecvs; 5353 while (i--) { 5354 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5355 } 5356 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5357 } else if (scall == MAT_REUSE_MATRIX) { 5358 sstartsj = *startsj_s; 5359 rstartsj = *startsj_r; 5360 bufa = *bufa_ptr; 5361 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5362 b_otha = b_oth->a; 5363 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5364 5365 /* a-array */ 5366 /*---------*/ 5367 /* post receives of a-array */ 5368 for (i=0; i<nrecvs; i++) { 5369 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5370 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5371 } 5372 5373 /* pack the outgoing message a-array */ 5374 k = 0; 5375 for (i=0; i<nsends; i++) { 5376 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5377 bufA = bufa+sstartsj[i]; 5378 for (j=0; j<nrows; j++) { 5379 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5380 for (ll=0; ll<sbs; ll++) { 5381 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5382 for (l=0; l<ncols; l++) { 5383 *bufA++ = vals[l]; 5384 } 5385 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5386 } 5387 } 5388 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5389 } 5390 /* recvs and sends of a-array are completed */ 5391 i = nrecvs; 5392 while (i--) { 5393 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5394 } 5395 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5396 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5397 5398 if (scall == MAT_INITIAL_MATRIX) { 5399 /* put together the new matrix */ 5400 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5401 5402 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5403 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5404 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5405 b_oth->free_a = PETSC_TRUE; 5406 b_oth->free_ij = PETSC_TRUE; 5407 b_oth->nonew = 0; 5408 5409 ierr = PetscFree(bufj);CHKERRQ(ierr); 5410 if (!startsj_s || !bufa_ptr) { 5411 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5412 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5413 } else { 5414 *startsj_s = sstartsj; 5415 *startsj_r = rstartsj; 5416 *bufa_ptr = bufa; 5417 } 5418 } 5419 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5420 PetscFunctionReturn(0); 5421 } 5422 5423 #undef __FUNCT__ 5424 #define __FUNCT__ "MatGetCommunicationStructs" 5425 /*@C 5426 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5427 5428 Not Collective 5429 5430 Input Parameters: 5431 . A - The matrix in mpiaij format 5432 5433 Output Parameter: 5434 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5435 . colmap - A map from global column index to local index into lvec 5436 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5437 5438 Level: developer 5439 5440 @*/ 5441 #if defined(PETSC_USE_CTABLE) 5442 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5443 #else 5444 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5445 #endif 5446 { 5447 Mat_MPIAIJ *a; 5448 5449 PetscFunctionBegin; 5450 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5451 PetscValidPointer(lvec, 2); 5452 PetscValidPointer(colmap, 3); 5453 PetscValidPointer(multScatter, 4); 5454 a = (Mat_MPIAIJ*) A->data; 5455 if (lvec) *lvec = a->lvec; 5456 if (colmap) *colmap = a->colmap; 5457 if (multScatter) *multScatter = a->Mvctx; 5458 PetscFunctionReturn(0); 5459 } 5460 5461 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5462 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5463 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5464 5465 #undef __FUNCT__ 5466 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5467 /* 5468 Computes (B'*A')' since computing B*A directly is untenable 5469 5470 n p p 5471 ( ) ( ) ( ) 5472 m ( A ) * n ( B ) = m ( C ) 5473 ( ) ( ) ( ) 5474 5475 */ 5476 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5477 { 5478 PetscErrorCode ierr; 5479 Mat At,Bt,Ct; 5480 5481 PetscFunctionBegin; 5482 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5483 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5484 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5485 ierr = MatDestroy(&At);CHKERRQ(ierr); 5486 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5487 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5488 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5489 PetscFunctionReturn(0); 5490 } 5491 5492 #undef __FUNCT__ 5493 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5494 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5495 { 5496 PetscErrorCode ierr; 5497 PetscInt m=A->rmap->n,n=B->cmap->n; 5498 Mat Cmat; 5499 5500 PetscFunctionBegin; 5501 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5502 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5503 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5504 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5505 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5506 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5507 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5508 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5509 5510 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5511 5512 *C = Cmat; 5513 PetscFunctionReturn(0); 5514 } 5515 5516 /* ----------------------------------------------------------------*/ 5517 #undef __FUNCT__ 5518 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5519 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5520 { 5521 PetscErrorCode ierr; 5522 5523 PetscFunctionBegin; 5524 if (scall == MAT_INITIAL_MATRIX) { 5525 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5526 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5527 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5528 } 5529 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5530 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5531 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5532 PetscFunctionReturn(0); 5533 } 5534 5535 #if defined(PETSC_HAVE_MUMPS) 5536 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5537 #endif 5538 #if defined(PETSC_HAVE_PASTIX) 5539 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5540 #endif 5541 #if defined(PETSC_HAVE_SUPERLU_DIST) 5542 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5543 #endif 5544 #if defined(PETSC_HAVE_CLIQUE) 5545 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5546 #endif 5547 5548 /*MC 5549 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5550 5551 Options Database Keys: 5552 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5553 5554 Level: beginner 5555 5556 .seealso: MatCreateAIJ() 5557 M*/ 5558 5559 #undef __FUNCT__ 5560 #define __FUNCT__ "MatCreate_MPIAIJ" 5561 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5562 { 5563 Mat_MPIAIJ *b; 5564 PetscErrorCode ierr; 5565 PetscMPIInt size; 5566 5567 PetscFunctionBegin; 5568 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5569 5570 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5571 B->data = (void*)b; 5572 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5573 B->assembled = PETSC_FALSE; 5574 B->insertmode = NOT_SET_VALUES; 5575 b->size = size; 5576 5577 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5578 5579 /* build cache for off array entries formed */ 5580 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5581 5582 b->donotstash = PETSC_FALSE; 5583 b->colmap = 0; 5584 b->garray = 0; 5585 b->roworiented = PETSC_TRUE; 5586 5587 /* stuff used for matrix vector multiply */ 5588 b->lvec = NULL; 5589 b->Mvctx = NULL; 5590 5591 /* stuff for MatGetRow() */ 5592 b->rowindices = 0; 5593 b->rowvalues = 0; 5594 b->getrowactive = PETSC_FALSE; 5595 5596 /* flexible pointer used in CUSP/CUSPARSE classes */ 5597 b->spptr = NULL; 5598 5599 #if defined(PETSC_HAVE_MUMPS) 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5601 #endif 5602 #if defined(PETSC_HAVE_PASTIX) 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5604 #endif 5605 #if defined(PETSC_HAVE_SUPERLU_DIST) 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5607 #endif 5608 #if defined(PETSC_HAVE_CLIQUE) 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5610 #endif 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5624 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5625 PetscFunctionReturn(0); 5626 } 5627 5628 #undef __FUNCT__ 5629 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5630 /*@ 5631 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5632 and "off-diagonal" part of the matrix in CSR format. 5633 5634 Collective on MPI_Comm 5635 5636 Input Parameters: 5637 + comm - MPI communicator 5638 . m - number of local rows (Cannot be PETSC_DECIDE) 5639 . n - This value should be the same as the local size used in creating the 5640 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5641 calculated if N is given) For square matrices n is almost always m. 5642 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5643 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5644 . i - row indices for "diagonal" portion of matrix 5645 . j - column indices 5646 . a - matrix values 5647 . oi - row indices for "off-diagonal" portion of matrix 5648 . oj - column indices 5649 - oa - matrix values 5650 5651 Output Parameter: 5652 . mat - the matrix 5653 5654 Level: advanced 5655 5656 Notes: 5657 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5658 must free the arrays once the matrix has been destroyed and not before. 5659 5660 The i and j indices are 0 based 5661 5662 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5663 5664 This sets local rows and cannot be used to set off-processor values. 5665 5666 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5667 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5668 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5669 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5670 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5671 communication if it is known that only local entries will be set. 5672 5673 .keywords: matrix, aij, compressed row, sparse, parallel 5674 5675 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5676 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5677 @*/ 5678 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5679 { 5680 PetscErrorCode ierr; 5681 Mat_MPIAIJ *maij; 5682 5683 PetscFunctionBegin; 5684 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5685 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5686 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5687 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5688 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5689 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5690 maij = (Mat_MPIAIJ*) (*mat)->data; 5691 5692 (*mat)->preallocated = PETSC_TRUE; 5693 5694 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5695 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5696 5697 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5698 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5699 5700 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5702 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5703 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5704 5705 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5706 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5707 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5708 PetscFunctionReturn(0); 5709 } 5710 5711 /* 5712 Special version for direct calls from Fortran 5713 */ 5714 #include <petsc-private/fortranimpl.h> 5715 5716 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5717 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5718 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5719 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5720 #endif 5721 5722 /* Change these macros so can be used in void function */ 5723 #undef CHKERRQ 5724 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5725 #undef SETERRQ2 5726 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5727 #undef SETERRQ3 5728 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5729 #undef SETERRQ 5730 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5731 5732 #undef __FUNCT__ 5733 #define __FUNCT__ "matsetvaluesmpiaij_" 5734 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5735 { 5736 Mat mat = *mmat; 5737 PetscInt m = *mm, n = *mn; 5738 InsertMode addv = *maddv; 5739 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5740 PetscScalar value; 5741 PetscErrorCode ierr; 5742 5743 MatCheckPreallocated(mat,1); 5744 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5745 5746 #if defined(PETSC_USE_DEBUG) 5747 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5748 #endif 5749 { 5750 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5751 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5752 PetscBool roworiented = aij->roworiented; 5753 5754 /* Some Variables required in the macro */ 5755 Mat A = aij->A; 5756 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5757 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5758 MatScalar *aa = a->a; 5759 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5760 Mat B = aij->B; 5761 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5762 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5763 MatScalar *ba = b->a; 5764 5765 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5766 PetscInt nonew = a->nonew; 5767 MatScalar *ap1,*ap2; 5768 5769 PetscFunctionBegin; 5770 for (i=0; i<m; i++) { 5771 if (im[i] < 0) continue; 5772 #if defined(PETSC_USE_DEBUG) 5773 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5774 #endif 5775 if (im[i] >= rstart && im[i] < rend) { 5776 row = im[i] - rstart; 5777 lastcol1 = -1; 5778 rp1 = aj + ai[row]; 5779 ap1 = aa + ai[row]; 5780 rmax1 = aimax[row]; 5781 nrow1 = ailen[row]; 5782 low1 = 0; 5783 high1 = nrow1; 5784 lastcol2 = -1; 5785 rp2 = bj + bi[row]; 5786 ap2 = ba + bi[row]; 5787 rmax2 = bimax[row]; 5788 nrow2 = bilen[row]; 5789 low2 = 0; 5790 high2 = nrow2; 5791 5792 for (j=0; j<n; j++) { 5793 if (roworiented) value = v[i*n+j]; 5794 else value = v[i+j*m]; 5795 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5796 if (in[j] >= cstart && in[j] < cend) { 5797 col = in[j] - cstart; 5798 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5799 } else if (in[j] < 0) continue; 5800 #if defined(PETSC_USE_DEBUG) 5801 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5802 #endif 5803 else { 5804 if (mat->was_assembled) { 5805 if (!aij->colmap) { 5806 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5807 } 5808 #if defined(PETSC_USE_CTABLE) 5809 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5810 col--; 5811 #else 5812 col = aij->colmap[in[j]] - 1; 5813 #endif 5814 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5815 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5816 col = in[j]; 5817 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5818 B = aij->B; 5819 b = (Mat_SeqAIJ*)B->data; 5820 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5821 rp2 = bj + bi[row]; 5822 ap2 = ba + bi[row]; 5823 rmax2 = bimax[row]; 5824 nrow2 = bilen[row]; 5825 low2 = 0; 5826 high2 = nrow2; 5827 bm = aij->B->rmap->n; 5828 ba = b->a; 5829 } 5830 } else col = in[j]; 5831 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5832 } 5833 } 5834 } else if (!aij->donotstash) { 5835 if (roworiented) { 5836 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5837 } else { 5838 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5839 } 5840 } 5841 } 5842 } 5843 PetscFunctionReturnVoid(); 5844 } 5845 5846