1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 109 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 110 { 111 PetscErrorCode ierr; 112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 113 114 PetscFunctionBegin; 115 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 116 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 117 } else { 118 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 119 } 120 PetscFunctionReturn(0); 121 } 122 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 127 { 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 129 PetscErrorCode ierr; 130 PetscInt i,rstart,nrows,*rows; 131 132 PetscFunctionBegin; 133 *zrows = NULL; 134 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 135 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 136 for (i=0; i<nrows; i++) rows[i] += rstart; 137 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 144 { 145 PetscErrorCode ierr; 146 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 147 PetscInt i,n,*garray = aij->garray; 148 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 149 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 150 PetscReal *work; 151 152 PetscFunctionBegin; 153 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 154 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 155 if (type == NORM_2) { 156 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 157 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 158 } 159 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 160 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 161 } 162 } else if (type == NORM_1) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 168 } 169 } else if (type == NORM_INFINITY) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 175 } 176 177 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 178 if (type == NORM_INFINITY) { 179 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 180 } else { 181 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 182 } 183 ierr = PetscFree(work);CHKERRQ(ierr); 184 if (type == NORM_2) { 185 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatDistribute_MPIAIJ" 192 /* 193 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 194 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 195 196 Only for square matrices 197 198 Used by a preconditioner, hence PETSC_EXTERN 199 */ 200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 201 { 202 PetscMPIInt rank,size; 203 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 204 PetscErrorCode ierr; 205 Mat mat; 206 Mat_SeqAIJ *gmata; 207 PetscMPIInt tag; 208 MPI_Status status; 209 PetscBool aij; 210 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 211 212 PetscFunctionBegin; 213 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 215 if (!rank) { 216 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 217 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 218 } 219 if (reuse == MAT_INITIAL_MATRIX) { 220 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 221 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 222 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 223 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 224 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 225 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 226 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 227 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 228 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 229 230 rowners[0] = 0; 231 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 232 rstart = rowners[rank]; 233 rend = rowners[rank+1]; 234 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 235 if (!rank) { 236 gmata = (Mat_SeqAIJ*) gmat->data; 237 /* send row lengths to all processors */ 238 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 239 for (i=1; i<size; i++) { 240 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 /* determine number diagonal and off-diagonal counts */ 243 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 244 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 245 jj = 0; 246 for (i=0; i<m; i++) { 247 for (j=0; j<dlens[i]; j++) { 248 if (gmata->j[jj] < rstart) ld[i]++; 249 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 250 jj++; 251 } 252 } 253 /* send column indices to other processes */ 254 for (i=1; i<size; i++) { 255 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 256 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 257 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 } 259 260 /* send numerical values to other processes */ 261 for (i=1; i<size; i++) { 262 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 263 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 264 } 265 gmataa = gmata->a; 266 gmataj = gmata->j; 267 268 } else { 269 /* receive row lengths */ 270 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 271 /* receive column indices */ 272 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 273 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 275 /* determine number diagonal and off-diagonal counts */ 276 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 277 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 278 jj = 0; 279 for (i=0; i<m; i++) { 280 for (j=0; j<dlens[i]; j++) { 281 if (gmataj[jj] < rstart) ld[i]++; 282 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 283 jj++; 284 } 285 } 286 /* receive numerical values */ 287 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 288 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 289 } 290 /* set preallocation */ 291 for (i=0; i<m; i++) { 292 dlens[i] -= olens[i]; 293 } 294 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 295 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 296 297 for (i=0; i<m; i++) { 298 dlens[i] += olens[i]; 299 } 300 cnt = 0; 301 for (i=0; i<m; i++) { 302 row = rstart + i; 303 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 304 cnt += dlens[i]; 305 } 306 if (rank) { 307 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 308 } 309 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 310 ierr = PetscFree(rowners);CHKERRQ(ierr); 311 312 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 313 314 *inmat = mat; 315 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 316 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 317 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 318 mat = *inmat; 319 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 320 if (!rank) { 321 /* send numerical values to other processes */ 322 gmata = (Mat_SeqAIJ*) gmat->data; 323 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 324 gmataa = gmata->a; 325 for (i=1; i<size; i++) { 326 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 327 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 328 } 329 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 330 } else { 331 /* receive numerical values from process 0*/ 332 nz = Ad->nz + Ao->nz; 333 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 334 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 335 } 336 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 337 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 338 ad = Ad->a; 339 ao = Ao->a; 340 if (mat->rmap->n) { 341 i = 0; 342 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 343 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 344 } 345 for (i=1; i<mat->rmap->n; i++) { 346 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 347 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 348 } 349 i--; 350 if (mat->rmap->n) { 351 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 352 } 353 if (rank) { 354 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 355 } 356 } 357 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to acess. 368 */ 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 372 { 373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 374 PetscErrorCode ierr; 375 PetscInt n = aij->B->cmap->n,i; 376 377 PetscFunctionBegin; 378 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 379 #if defined(PETSC_USE_CTABLE) 380 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 381 for (i=0; i<n; i++) { 382 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 383 } 384 #else 385 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 386 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 387 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 388 #endif 389 PetscFunctionReturn(0); 390 } 391 392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 393 { \ 394 if (col <= lastcol1) low1 = 0; \ 395 else high1 = nrow1; \ 396 lastcol1 = col;\ 397 while (high1-low1 > 5) { \ 398 t = (low1+high1)/2; \ 399 if (rp1[t] > col) high1 = t; \ 400 else low1 = t; \ 401 } \ 402 for (_i=low1; _i<high1; _i++) { \ 403 if (rp1[_i] > col) break; \ 404 if (rp1[_i] == col) { \ 405 if (addv == ADD_VALUES) ap1[_i] += value; \ 406 else ap1[_i] = value; \ 407 goto a_noinsert; \ 408 } \ 409 } \ 410 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 411 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 412 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 413 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 414 N = nrow1++ - 1; a->nz++; high1++; \ 415 /* shift up all the later entries in this row */ \ 416 for (ii=N; ii>=_i; ii--) { \ 417 rp1[ii+1] = rp1[ii]; \ 418 ap1[ii+1] = ap1[ii]; \ 419 } \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++;\ 423 a_noinsert: ; \ 424 ailen[row] = nrow1; \ 425 } 426 427 428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 429 { \ 430 if (col <= lastcol2) low2 = 0; \ 431 else high2 = nrow2; \ 432 lastcol2 = col; \ 433 while (high2-low2 > 5) { \ 434 t = (low2+high2)/2; \ 435 if (rp2[t] > col) high2 = t; \ 436 else low2 = t; \ 437 } \ 438 for (_i=low2; _i<high2; _i++) { \ 439 if (rp2[_i] > col) break; \ 440 if (rp2[_i] == col) { \ 441 if (addv == ADD_VALUES) ap2[_i] += value; \ 442 else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 447 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 448 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 449 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 450 N = nrow2++ - 1; b->nz++; high2++; \ 451 /* shift up all the later entries in this row */ \ 452 for (ii=N; ii>=_i; ii--) { \ 453 rp2[ii+1] = rp2[ii]; \ 454 ap2[ii+1] = ap2[ii]; \ 455 } \ 456 rp2[_i] = col; \ 457 ap2[_i] = value; \ 458 B->nonzerostate++; \ 459 b_noinsert: ; \ 460 bilen[row] = nrow2; \ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 466 { 467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 469 PetscErrorCode ierr; 470 PetscInt l,*garray = mat->garray,diag; 471 472 PetscFunctionBegin; 473 /* code only works for square matrices A */ 474 475 /* find size of row to the left of the diagonal part */ 476 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 477 row = row - diag; 478 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 479 if (garray[b->j[b->i[row]+l]] > diag) break; 480 } 481 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 482 483 /* diagonal part */ 484 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 485 486 /* right of diagonal part */ 487 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatSetValues_MPIAIJ" 493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 494 { 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 496 PetscScalar value; 497 PetscErrorCode ierr; 498 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 499 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 500 PetscBool roworiented = aij->roworiented; 501 502 /* Some Variables required in the macro */ 503 Mat A = aij->A; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 505 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 506 MatScalar *aa = a->a; 507 PetscBool ignorezeroentries = a->ignorezeroentries; 508 Mat B = aij->B; 509 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 510 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 511 MatScalar *ba = b->a; 512 513 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 514 PetscInt nonew; 515 MatScalar *ap1,*ap2; 516 517 PetscFunctionBegin; 518 for (i=0; i<m; i++) { 519 if (im[i] < 0) continue; 520 #if defined(PETSC_USE_DEBUG) 521 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 522 #endif 523 if (im[i] >= rstart && im[i] < rend) { 524 row = im[i] - rstart; 525 lastcol1 = -1; 526 rp1 = aj + ai[row]; 527 ap1 = aa + ai[row]; 528 rmax1 = aimax[row]; 529 nrow1 = ailen[row]; 530 low1 = 0; 531 high1 = nrow1; 532 lastcol2 = -1; 533 rp2 = bj + bi[row]; 534 ap2 = ba + bi[row]; 535 rmax2 = bimax[row]; 536 nrow2 = bilen[row]; 537 low2 = 0; 538 high2 = nrow2; 539 540 for (j=0; j<n; j++) { 541 if (roworiented) value = v[i*n+j]; 542 else value = v[i+j*m]; 543 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 544 if (in[j] >= cstart && in[j] < cend) { 545 col = in[j] - cstart; 546 nonew = a->nonew; 547 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 548 } else if (in[j] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 551 #endif 552 else { 553 if (mat->was_assembled) { 554 if (!aij->colmap) { 555 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 559 col--; 560 #else 561 col = aij->colmap[in[j]] - 1; 562 #endif 563 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 564 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 565 col = in[j]; 566 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 567 B = aij->B; 568 b = (Mat_SeqAIJ*)B->data; 569 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 570 rp2 = bj + bi[row]; 571 ap2 = ba + bi[row]; 572 rmax2 = bimax[row]; 573 nrow2 = bilen[row]; 574 low2 = 0; 575 high2 = nrow2; 576 bm = aij->B->rmap->n; 577 ba = b->a; 578 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 579 } else col = in[j]; 580 nonew = b->nonew; 581 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 582 } 583 } 584 } else { 585 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 586 if (!aij->donotstash) { 587 mat->assembled = PETSC_FALSE; 588 if (roworiented) { 589 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 590 } else { 591 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 592 } 593 } 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatGetValues_MPIAIJ" 601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 602 { 603 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 604 PetscErrorCode ierr; 605 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 606 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 607 608 PetscFunctionBegin; 609 for (i=0; i<m; i++) { 610 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 611 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 612 if (idxm[i] >= rstart && idxm[i] < rend) { 613 row = idxm[i] - rstart; 614 for (j=0; j<n; j++) { 615 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 616 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 617 if (idxn[j] >= cstart && idxn[j] < cend) { 618 col = idxn[j] - cstart; 619 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } else { 621 if (!aij->colmap) { 622 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 623 } 624 #if defined(PETSC_USE_CTABLE) 625 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 626 col--; 627 #else 628 col = aij->colmap[idxn[j]] - 1; 629 #endif 630 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 631 else { 632 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } 634 } 635 } 636 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 637 } 638 PetscFunctionReturn(0); 639 } 640 641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 648 PetscErrorCode ierr; 649 PetscInt nstash,reallocs; 650 InsertMode addv; 651 652 PetscFunctionBegin; 653 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 654 655 /* make sure all processors are either in INSERTMODE or ADDMODE */ 656 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 657 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 658 mat->insertmode = addv; /* in case this processor had no cache */ 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 672 PetscErrorCode ierr; 673 PetscMPIInt n; 674 PetscInt i,j,rstart,ncols,flg; 675 PetscInt *row,*col; 676 PetscBool other_disassembled; 677 PetscScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 681 682 PetscFunctionBegin; 683 if (!aij->donotstash && !mat->nooffprocentries) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 if (!flg) break; 687 688 for (i=0; i<n; ) { 689 /* Now identify the consecutive vals belonging to the same row */ 690 for (j=i,rstart=row[j]; j<n; j++) { 691 if (row[j] != rstart) break; 692 } 693 if (j < n) ncols = j-i; 694 else ncols = n-i; 695 /* Now assemble all these values with a single function call */ 696 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 697 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 } 703 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 705 706 /* determine if any processor has disassembled, if so we must 707 also disassemble ourselfs, in order that we may reassemble. */ 708 /* 709 if nonzero structure of submatrix B cannot change then we know that 710 no processor disassembled thus we can skip this stuff 711 */ 712 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 713 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 714 if (mat->was_assembled && !other_disassembled) { 715 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 716 } 717 } 718 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 719 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 720 } 721 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 722 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 724 725 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 726 727 aij->rowvalues = 0; 728 729 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 730 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 731 732 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 733 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 734 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 735 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 742 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 743 { 744 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 749 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatZeroRows_MPIAIJ" 755 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 756 { 757 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 758 PetscInt *owners = A->rmap->range; 759 PetscInt n = A->rmap->n; 760 PetscSF sf; 761 PetscInt *lrows; 762 PetscSFNode *rrows; 763 PetscInt r, p = 0, len = 0; 764 PetscErrorCode ierr; 765 766 PetscFunctionBegin; 767 /* Create SF where leaves are input rows and roots are owned rows */ 768 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 769 for (r = 0; r < n; ++r) lrows[r] = -1; 770 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 771 for (r = 0; r < N; ++r) { 772 const PetscInt idx = rows[r]; 773 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 774 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 775 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 776 } 777 if (A->nooffproczerorows) { 778 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 779 lrows[len++] = idx - owners[p]; 780 } else { 781 rrows[r].rank = p; 782 rrows[r].index = rows[r] - owners[p]; 783 } 784 } 785 if (!A->nooffproczerorows) { 786 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 787 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 788 /* Collect flags for rows to be zeroed */ 789 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 790 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 791 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 792 /* Compress and put in row numbers */ 793 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 794 } 795 /* fix right hand side if needed */ 796 if (x && b) { 797 const PetscScalar *xx; 798 PetscScalar *bb; 799 800 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 801 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 802 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 803 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 804 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 805 } 806 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 807 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 808 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 809 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 810 } else if (diag != 0.0) { 811 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 812 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 813 for (r = 0; r < len; ++r) { 814 const PetscInt row = lrows[r] + A->rmap->rstart; 815 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 816 } 817 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 818 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 } else { 820 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 821 } 822 ierr = PetscFree(lrows);CHKERRQ(ierr); 823 824 /* only change matrix nonzero state if pattern was allowed to be changed */ 825 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 826 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 827 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 828 } 829 PetscFunctionReturn(0); 830 } 831 832 #undef __FUNCT__ 833 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 834 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 835 { 836 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 837 PetscErrorCode ierr; 838 PetscMPIInt n = A->rmap->n; 839 PetscInt i,j,r,m,p = 0,len = 0; 840 PetscInt *lrows,*owners = A->rmap->range; 841 PetscSFNode *rrows; 842 PetscSF sf; 843 const PetscScalar *xx; 844 PetscScalar *bb,*mask; 845 Vec xmask,lmask; 846 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 847 const PetscInt *aj, *ii,*ridx; 848 PetscScalar *aa; 849 850 PetscFunctionBegin; 851 /* Create SF where leaves are input rows and roots are owned rows */ 852 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 853 for (r = 0; r < n; ++r) lrows[r] = -1; 854 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 855 for (r = 0; r < N; ++r) { 856 const PetscInt idx = rows[r]; 857 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 858 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 859 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 860 } 861 rrows[r].rank = p; 862 rrows[r].index = rows[r] - owners[p]; 863 } 864 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 865 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 866 /* Collect flags for rows to be zeroed */ 867 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 868 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 869 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 870 /* Compress and put in row numbers */ 871 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 872 /* zero diagonal part of matrix */ 873 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 874 /* handle off diagonal part of matrix */ 875 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 876 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 877 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 878 for (i=0; i<len; i++) bb[lrows[i]] = 1; 879 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 880 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 883 if (x) { 884 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 887 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 888 } 889 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 890 /* remove zeroed rows of off diagonal matrix */ 891 ii = aij->i; 892 for (i=0; i<len; i++) { 893 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 894 } 895 /* loop over all elements of off process part of matrix zeroing removed columns*/ 896 if (aij->compressedrow.use) { 897 m = aij->compressedrow.nrows; 898 ii = aij->compressedrow.i; 899 ridx = aij->compressedrow.rindex; 900 for (i=0; i<m; i++) { 901 n = ii[i+1] - ii[i]; 902 aj = aij->j + ii[i]; 903 aa = aij->a + ii[i]; 904 905 for (j=0; j<n; j++) { 906 if (PetscAbsScalar(mask[*aj])) { 907 if (b) bb[*ridx] -= *aa*xx[*aj]; 908 *aa = 0.0; 909 } 910 aa++; 911 aj++; 912 } 913 ridx++; 914 } 915 } else { /* do not use compressed row format */ 916 m = l->B->rmap->n; 917 for (i=0; i<m; i++) { 918 n = ii[i+1] - ii[i]; 919 aj = aij->j + ii[i]; 920 aa = aij->a + ii[i]; 921 for (j=0; j<n; j++) { 922 if (PetscAbsScalar(mask[*aj])) { 923 if (b) bb[i] -= *aa*xx[*aj]; 924 *aa = 0.0; 925 } 926 aa++; 927 aj++; 928 } 929 } 930 } 931 if (x) { 932 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 933 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 934 } 935 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 936 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 937 ierr = PetscFree(lrows);CHKERRQ(ierr); 938 939 /* only change matrix nonzero state if pattern was allowed to be changed */ 940 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 941 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 942 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 943 } 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatMult_MPIAIJ" 949 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 950 { 951 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 952 PetscErrorCode ierr; 953 PetscInt nt; 954 955 PetscFunctionBegin; 956 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 957 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 958 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 960 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 PetscErrorCode ierr; 971 972 PetscFunctionBegin; 973 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatMultAdd_MPIAIJ" 979 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 980 { 981 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 982 PetscErrorCode ierr; 983 984 PetscFunctionBegin; 985 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 987 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 994 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 995 { 996 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 997 PetscErrorCode ierr; 998 PetscBool merged; 999 1000 PetscFunctionBegin; 1001 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1002 /* do nondiagonal part */ 1003 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1004 if (!merged) { 1005 /* send it on its way */ 1006 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1007 /* do local part */ 1008 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1009 /* receive remote parts: note this assumes the values are not actually */ 1010 /* added in yy until the next line, */ 1011 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1012 } else { 1013 /* do local part */ 1014 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1015 /* send it on its way */ 1016 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1017 /* values actually were received in the Begin() but we need to call this nop */ 1018 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1019 } 1020 PetscFunctionReturn(0); 1021 } 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1025 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1026 { 1027 MPI_Comm comm; 1028 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1029 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1030 IS Me,Notme; 1031 PetscErrorCode ierr; 1032 PetscInt M,N,first,last,*notme,i; 1033 PetscMPIInt size; 1034 1035 PetscFunctionBegin; 1036 /* Easy test: symmetric diagonal block */ 1037 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1038 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1039 if (!*f) PetscFunctionReturn(0); 1040 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1041 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1042 if (size == 1) PetscFunctionReturn(0); 1043 1044 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1045 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1046 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1047 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1048 for (i=0; i<first; i++) notme[i] = i; 1049 for (i=last; i<M; i++) notme[i-last+first] = i; 1050 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1051 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1052 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1053 Aoff = Aoffs[0]; 1054 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1055 Boff = Boffs[0]; 1056 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1057 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1058 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1059 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1060 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1061 ierr = PetscFree(notme);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 1072 PetscFunctionBegin; 1073 /* do nondiagonal part */ 1074 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1075 /* send it on its way */ 1076 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1077 /* do local part */ 1078 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1079 /* receive remote parts */ 1080 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 /* 1085 This only works correctly for square matrices where the subblock A->A is the 1086 diagonal block 1087 */ 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1090 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1091 { 1092 PetscErrorCode ierr; 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 1095 PetscFunctionBegin; 1096 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1097 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1098 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatScale_MPIAIJ" 1104 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1105 { 1106 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1111 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatDestroy_Redundant" 1117 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1118 { 1119 PetscErrorCode ierr; 1120 Mat_Redundant *redund = *redundant; 1121 PetscInt i; 1122 1123 PetscFunctionBegin; 1124 *redundant = NULL; 1125 if (redund){ 1126 if (redund->matseq) { /* via MatGetSubMatrices() */ 1127 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1128 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1129 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1130 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1131 } else { 1132 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1133 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1134 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1135 for (i=0; i<redund->nrecvs; i++) { 1136 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1137 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1138 } 1139 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1140 } 1141 1142 if (redund->psubcomm) { 1143 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1144 } 1145 ierr = PetscFree(redund);CHKERRQ(ierr); 1146 } 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatDestroy_MPIAIJ" 1152 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1153 { 1154 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1155 PetscErrorCode ierr; 1156 1157 PetscFunctionBegin; 1158 #if defined(PETSC_USE_LOG) 1159 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1160 #endif 1161 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1162 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1163 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1164 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1165 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1166 #if defined(PETSC_USE_CTABLE) 1167 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1168 #else 1169 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1170 #endif 1171 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1172 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1173 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1174 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1175 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1176 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1177 1178 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1181 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1182 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1187 #if defined(PETSC_HAVE_ELEMENTAL) 1188 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1189 #endif 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1195 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1196 { 1197 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1198 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1199 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1200 PetscErrorCode ierr; 1201 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1202 int fd; 1203 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1204 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1205 PetscScalar *column_values; 1206 PetscInt message_count,flowcontrolcount; 1207 FILE *file; 1208 1209 PetscFunctionBegin; 1210 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1211 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1212 nz = A->nz + B->nz; 1213 if (!rank) { 1214 header[0] = MAT_FILE_CLASSID; 1215 header[1] = mat->rmap->N; 1216 header[2] = mat->cmap->N; 1217 1218 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1220 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1221 /* get largest number of rows any processor has */ 1222 rlen = mat->rmap->n; 1223 range = mat->rmap->range; 1224 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1225 } else { 1226 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1227 rlen = mat->rmap->n; 1228 } 1229 1230 /* load up the local row counts */ 1231 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1232 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1233 1234 /* store the row lengths to the file */ 1235 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1236 if (!rank) { 1237 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1238 for (i=1; i<size; i++) { 1239 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1240 rlen = range[i+1] - range[i]; 1241 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1242 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1243 } 1244 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1245 } else { 1246 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1247 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1248 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1249 } 1250 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1251 1252 /* load up the local column indices */ 1253 nzmax = nz; /* th processor needs space a largest processor needs */ 1254 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1255 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1256 cnt = 0; 1257 for (i=0; i<mat->rmap->n; i++) { 1258 for (j=B->i[i]; j<B->i[i+1]; j++) { 1259 if ((col = garray[B->j[j]]) > cstart) break; 1260 column_indices[cnt++] = col; 1261 } 1262 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1263 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1264 } 1265 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1266 1267 /* store the column indices to the file */ 1268 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1269 if (!rank) { 1270 MPI_Status status; 1271 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1272 for (i=1; i<size; i++) { 1273 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1274 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1275 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1276 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1277 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1278 } 1279 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1280 } else { 1281 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1282 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1283 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1284 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1285 } 1286 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1287 1288 /* load up the local column values */ 1289 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1290 cnt = 0; 1291 for (i=0; i<mat->rmap->n; i++) { 1292 for (j=B->i[i]; j<B->i[i+1]; j++) { 1293 if (garray[B->j[j]] > cstart) break; 1294 column_values[cnt++] = B->a[j]; 1295 } 1296 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1297 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1298 } 1299 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1300 1301 /* store the column values to the file */ 1302 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1303 if (!rank) { 1304 MPI_Status status; 1305 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1306 for (i=1; i<size; i++) { 1307 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1308 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1309 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1310 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1311 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1312 } 1313 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1314 } else { 1315 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1316 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1317 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1318 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1319 } 1320 ierr = PetscFree(column_values);CHKERRQ(ierr); 1321 1322 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1323 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #include <petscdraw.h> 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1330 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1331 { 1332 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1333 PetscErrorCode ierr; 1334 PetscMPIInt rank = aij->rank,size = aij->size; 1335 PetscBool isdraw,iascii,isbinary; 1336 PetscViewer sviewer; 1337 PetscViewerFormat format; 1338 1339 PetscFunctionBegin; 1340 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1341 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1342 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1343 if (iascii) { 1344 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1345 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1346 MatInfo info; 1347 PetscBool inodes; 1348 1349 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1350 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1351 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1352 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1353 if (!inodes) { 1354 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1355 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1356 } else { 1357 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1358 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1359 } 1360 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1361 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1362 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1363 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1364 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1365 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1366 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1367 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1370 PetscInt inodecount,inodelimit,*inodes; 1371 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1372 if (inodes) { 1373 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1374 } else { 1375 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1376 } 1377 PetscFunctionReturn(0); 1378 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1379 PetscFunctionReturn(0); 1380 } 1381 } else if (isbinary) { 1382 if (size == 1) { 1383 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1384 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1385 } else { 1386 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1387 } 1388 PetscFunctionReturn(0); 1389 } else if (isdraw) { 1390 PetscDraw draw; 1391 PetscBool isnull; 1392 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1393 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1394 } 1395 1396 { 1397 /* assemble the entire matrix onto first processor. */ 1398 Mat A; 1399 Mat_SeqAIJ *Aloc; 1400 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1401 MatScalar *a; 1402 1403 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1404 if (!rank) { 1405 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1406 } else { 1407 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1408 } 1409 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1410 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1411 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1412 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1413 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1414 1415 /* copy over the A part */ 1416 Aloc = (Mat_SeqAIJ*)aij->A->data; 1417 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1418 row = mat->rmap->rstart; 1419 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1420 for (i=0; i<m; i++) { 1421 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1422 row++; 1423 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1424 } 1425 aj = Aloc->j; 1426 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1427 1428 /* copy over the B part */ 1429 Aloc = (Mat_SeqAIJ*)aij->B->data; 1430 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1431 row = mat->rmap->rstart; 1432 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1433 ct = cols; 1434 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1435 for (i=0; i<m; i++) { 1436 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1437 row++; 1438 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1439 } 1440 ierr = PetscFree(ct);CHKERRQ(ierr); 1441 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1442 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1443 /* 1444 Everyone has to call to draw the matrix since the graphics waits are 1445 synchronized across all processors that share the PetscDraw object 1446 */ 1447 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1448 if (!rank) { 1449 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1450 } 1451 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1452 ierr = MatDestroy(&A);CHKERRQ(ierr); 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatView_MPIAIJ" 1459 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1460 { 1461 PetscErrorCode ierr; 1462 PetscBool iascii,isdraw,issocket,isbinary; 1463 1464 PetscFunctionBegin; 1465 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1466 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1467 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1468 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1469 if (iascii || isdraw || isbinary || issocket) { 1470 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1471 } 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatSOR_MPIAIJ" 1477 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1478 { 1479 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1480 PetscErrorCode ierr; 1481 Vec bb1 = 0; 1482 PetscBool hasop; 1483 1484 PetscFunctionBegin; 1485 if (flag == SOR_APPLY_UPPER) { 1486 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1487 PetscFunctionReturn(0); 1488 } 1489 1490 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1491 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1492 } 1493 1494 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1495 if (flag & SOR_ZERO_INITIAL_GUESS) { 1496 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1497 its--; 1498 } 1499 1500 while (its--) { 1501 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1502 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 1504 /* update rhs: bb1 = bb - B*x */ 1505 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1506 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1507 1508 /* local sweep */ 1509 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1510 } 1511 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1512 if (flag & SOR_ZERO_INITIAL_GUESS) { 1513 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1514 its--; 1515 } 1516 while (its--) { 1517 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 1520 /* update rhs: bb1 = bb - B*x */ 1521 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1522 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1523 1524 /* local sweep */ 1525 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1526 } 1527 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1528 if (flag & SOR_ZERO_INITIAL_GUESS) { 1529 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1530 its--; 1531 } 1532 while (its--) { 1533 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1534 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1535 1536 /* update rhs: bb1 = bb - B*x */ 1537 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1538 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1539 1540 /* local sweep */ 1541 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1542 } 1543 } else if (flag & SOR_EISENSTAT) { 1544 Vec xx1; 1545 1546 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1547 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1548 1549 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1550 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1551 if (!mat->diag) { 1552 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1553 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1554 } 1555 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1556 if (hasop) { 1557 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1558 } else { 1559 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1560 } 1561 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1562 1563 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1564 1565 /* local sweep */ 1566 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1567 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1568 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1569 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1570 1571 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 #undef __FUNCT__ 1576 #define __FUNCT__ "MatPermute_MPIAIJ" 1577 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1578 { 1579 Mat aA,aB,Aperm; 1580 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1581 PetscScalar *aa,*ba; 1582 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1583 PetscSF rowsf,sf; 1584 IS parcolp = NULL; 1585 PetscBool done; 1586 PetscErrorCode ierr; 1587 1588 PetscFunctionBegin; 1589 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1590 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1591 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1592 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1593 1594 /* Invert row permutation to find out where my rows should go */ 1595 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1596 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1597 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1598 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1599 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1600 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1601 1602 /* Invert column permutation to find out where my columns should go */ 1603 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1604 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1605 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1606 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1607 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1608 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1609 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1610 1611 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1612 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1613 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1614 1615 /* Find out where my gcols should go */ 1616 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1617 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1618 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1619 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1620 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1621 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1622 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1623 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1624 1625 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1626 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1627 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1628 for (i=0; i<m; i++) { 1629 PetscInt row = rdest[i],rowner; 1630 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1631 for (j=ai[i]; j<ai[i+1]; j++) { 1632 PetscInt cowner,col = cdest[aj[j]]; 1633 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1634 if (rowner == cowner) dnnz[i]++; 1635 else onnz[i]++; 1636 } 1637 for (j=bi[i]; j<bi[i+1]; j++) { 1638 PetscInt cowner,col = gcdest[bj[j]]; 1639 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1640 if (rowner == cowner) dnnz[i]++; 1641 else onnz[i]++; 1642 } 1643 } 1644 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1645 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1646 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1647 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1648 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1649 1650 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1651 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1652 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1653 for (i=0; i<m; i++) { 1654 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1655 PetscInt j0,rowlen; 1656 rowlen = ai[i+1] - ai[i]; 1657 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1658 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1659 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1660 } 1661 rowlen = bi[i+1] - bi[i]; 1662 for (j0=j=0; j<rowlen; j0=j) { 1663 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1664 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1665 } 1666 } 1667 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1668 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1669 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1670 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1671 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1672 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1673 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1674 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1675 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1676 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1677 *B = Aperm; 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1683 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1684 { 1685 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1686 Mat A = mat->A,B = mat->B; 1687 PetscErrorCode ierr; 1688 PetscReal isend[5],irecv[5]; 1689 1690 PetscFunctionBegin; 1691 info->block_size = 1.0; 1692 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1693 1694 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1695 isend[3] = info->memory; isend[4] = info->mallocs; 1696 1697 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1698 1699 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1700 isend[3] += info->memory; isend[4] += info->mallocs; 1701 if (flag == MAT_LOCAL) { 1702 info->nz_used = isend[0]; 1703 info->nz_allocated = isend[1]; 1704 info->nz_unneeded = isend[2]; 1705 info->memory = isend[3]; 1706 info->mallocs = isend[4]; 1707 } else if (flag == MAT_GLOBAL_MAX) { 1708 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1709 1710 info->nz_used = irecv[0]; 1711 info->nz_allocated = irecv[1]; 1712 info->nz_unneeded = irecv[2]; 1713 info->memory = irecv[3]; 1714 info->mallocs = irecv[4]; 1715 } else if (flag == MAT_GLOBAL_SUM) { 1716 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1717 1718 info->nz_used = irecv[0]; 1719 info->nz_allocated = irecv[1]; 1720 info->nz_unneeded = irecv[2]; 1721 info->memory = irecv[3]; 1722 info->mallocs = irecv[4]; 1723 } 1724 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1725 info->fill_ratio_needed = 0; 1726 info->factor_mallocs = 0; 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatSetOption_MPIAIJ" 1732 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1733 { 1734 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 switch (op) { 1739 case MAT_NEW_NONZERO_LOCATIONS: 1740 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1741 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1742 case MAT_KEEP_NONZERO_PATTERN: 1743 case MAT_NEW_NONZERO_LOCATION_ERR: 1744 case MAT_USE_INODES: 1745 case MAT_IGNORE_ZERO_ENTRIES: 1746 MatCheckPreallocated(A,1); 1747 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1748 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1749 break; 1750 case MAT_ROW_ORIENTED: 1751 a->roworiented = flg; 1752 1753 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1754 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1755 break; 1756 case MAT_NEW_DIAGONALS: 1757 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1758 break; 1759 case MAT_IGNORE_OFF_PROC_ENTRIES: 1760 a->donotstash = flg; 1761 break; 1762 case MAT_SPD: 1763 A->spd_set = PETSC_TRUE; 1764 A->spd = flg; 1765 if (flg) { 1766 A->symmetric = PETSC_TRUE; 1767 A->structurally_symmetric = PETSC_TRUE; 1768 A->symmetric_set = PETSC_TRUE; 1769 A->structurally_symmetric_set = PETSC_TRUE; 1770 } 1771 break; 1772 case MAT_SYMMETRIC: 1773 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1774 break; 1775 case MAT_STRUCTURALLY_SYMMETRIC: 1776 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1777 break; 1778 case MAT_HERMITIAN: 1779 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1780 break; 1781 case MAT_SYMMETRY_ETERNAL: 1782 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1783 break; 1784 default: 1785 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1786 } 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatGetRow_MPIAIJ" 1792 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1793 { 1794 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1795 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1796 PetscErrorCode ierr; 1797 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1798 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1799 PetscInt *cmap,*idx_p; 1800 1801 PetscFunctionBegin; 1802 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1803 mat->getrowactive = PETSC_TRUE; 1804 1805 if (!mat->rowvalues && (idx || v)) { 1806 /* 1807 allocate enough space to hold information from the longest row. 1808 */ 1809 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1810 PetscInt max = 1,tmp; 1811 for (i=0; i<matin->rmap->n; i++) { 1812 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1813 if (max < tmp) max = tmp; 1814 } 1815 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1816 } 1817 1818 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1819 lrow = row - rstart; 1820 1821 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1822 if (!v) {pvA = 0; pvB = 0;} 1823 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1824 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1825 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1826 nztot = nzA + nzB; 1827 1828 cmap = mat->garray; 1829 if (v || idx) { 1830 if (nztot) { 1831 /* Sort by increasing column numbers, assuming A and B already sorted */ 1832 PetscInt imark = -1; 1833 if (v) { 1834 *v = v_p = mat->rowvalues; 1835 for (i=0; i<nzB; i++) { 1836 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1837 else break; 1838 } 1839 imark = i; 1840 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1841 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1842 } 1843 if (idx) { 1844 *idx = idx_p = mat->rowindices; 1845 if (imark > -1) { 1846 for (i=0; i<imark; i++) { 1847 idx_p[i] = cmap[cworkB[i]]; 1848 } 1849 } else { 1850 for (i=0; i<nzB; i++) { 1851 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1852 else break; 1853 } 1854 imark = i; 1855 } 1856 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1857 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1858 } 1859 } else { 1860 if (idx) *idx = 0; 1861 if (v) *v = 0; 1862 } 1863 } 1864 *nz = nztot; 1865 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1866 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1872 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1873 { 1874 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1875 1876 PetscFunctionBegin; 1877 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1878 aij->getrowactive = PETSC_FALSE; 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatNorm_MPIAIJ" 1884 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1885 { 1886 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1887 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1888 PetscErrorCode ierr; 1889 PetscInt i,j,cstart = mat->cmap->rstart; 1890 PetscReal sum = 0.0; 1891 MatScalar *v; 1892 1893 PetscFunctionBegin; 1894 if (aij->size == 1) { 1895 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1896 } else { 1897 if (type == NORM_FROBENIUS) { 1898 v = amat->a; 1899 for (i=0; i<amat->nz; i++) { 1900 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1901 } 1902 v = bmat->a; 1903 for (i=0; i<bmat->nz; i++) { 1904 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1905 } 1906 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1907 *norm = PetscSqrtReal(*norm); 1908 } else if (type == NORM_1) { /* max column norm */ 1909 PetscReal *tmp,*tmp2; 1910 PetscInt *jj,*garray = aij->garray; 1911 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1912 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1913 *norm = 0.0; 1914 v = amat->a; jj = amat->j; 1915 for (j=0; j<amat->nz; j++) { 1916 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1917 } 1918 v = bmat->a; jj = bmat->j; 1919 for (j=0; j<bmat->nz; j++) { 1920 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1921 } 1922 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1923 for (j=0; j<mat->cmap->N; j++) { 1924 if (tmp2[j] > *norm) *norm = tmp2[j]; 1925 } 1926 ierr = PetscFree(tmp);CHKERRQ(ierr); 1927 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1928 } else if (type == NORM_INFINITY) { /* max row norm */ 1929 PetscReal ntemp = 0.0; 1930 for (j=0; j<aij->A->rmap->n; j++) { 1931 v = amat->a + amat->i[j]; 1932 sum = 0.0; 1933 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1934 sum += PetscAbsScalar(*v); v++; 1935 } 1936 v = bmat->a + bmat->i[j]; 1937 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1938 sum += PetscAbsScalar(*v); v++; 1939 } 1940 if (sum > ntemp) ntemp = sum; 1941 } 1942 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1943 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1944 } 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatTranspose_MPIAIJ" 1950 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1951 { 1952 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1953 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1954 PetscErrorCode ierr; 1955 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1956 PetscInt cstart = A->cmap->rstart,ncol; 1957 Mat B; 1958 MatScalar *array; 1959 1960 PetscFunctionBegin; 1961 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1962 1963 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1964 ai = Aloc->i; aj = Aloc->j; 1965 bi = Bloc->i; bj = Bloc->j; 1966 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1967 PetscInt *d_nnz,*g_nnz,*o_nnz; 1968 PetscSFNode *oloc; 1969 PETSC_UNUSED PetscSF sf; 1970 1971 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1972 /* compute d_nnz for preallocation */ 1973 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1974 for (i=0; i<ai[ma]; i++) { 1975 d_nnz[aj[i]]++; 1976 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1977 } 1978 /* compute local off-diagonal contributions */ 1979 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1980 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1981 /* map those to global */ 1982 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1983 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1984 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1985 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1986 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1987 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1988 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1989 1990 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1991 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1992 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1993 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1994 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1995 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1996 } else { 1997 B = *matout; 1998 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1999 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2000 } 2001 2002 /* copy over the A part */ 2003 array = Aloc->a; 2004 row = A->rmap->rstart; 2005 for (i=0; i<ma; i++) { 2006 ncol = ai[i+1]-ai[i]; 2007 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2008 row++; 2009 array += ncol; aj += ncol; 2010 } 2011 aj = Aloc->j; 2012 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2013 2014 /* copy over the B part */ 2015 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2016 array = Bloc->a; 2017 row = A->rmap->rstart; 2018 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2019 cols_tmp = cols; 2020 for (i=0; i<mb; i++) { 2021 ncol = bi[i+1]-bi[i]; 2022 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2023 row++; 2024 array += ncol; cols_tmp += ncol; 2025 } 2026 ierr = PetscFree(cols);CHKERRQ(ierr); 2027 2028 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2029 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2030 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2031 *matout = B; 2032 } else { 2033 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2034 } 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2040 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2041 { 2042 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2043 Mat a = aij->A,b = aij->B; 2044 PetscErrorCode ierr; 2045 PetscInt s1,s2,s3; 2046 2047 PetscFunctionBegin; 2048 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2049 if (rr) { 2050 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2051 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2052 /* Overlap communication with computation. */ 2053 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2054 } 2055 if (ll) { 2056 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2057 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2058 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2059 } 2060 /* scale the diagonal block */ 2061 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2062 2063 if (rr) { 2064 /* Do a scatter end and then right scale the off-diagonal block */ 2065 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2066 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2067 } 2068 PetscFunctionReturn(0); 2069 } 2070 2071 #undef __FUNCT__ 2072 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2073 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2074 { 2075 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2076 PetscErrorCode ierr; 2077 2078 PetscFunctionBegin; 2079 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2080 PetscFunctionReturn(0); 2081 } 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatEqual_MPIAIJ" 2085 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2086 { 2087 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2088 Mat a,b,c,d; 2089 PetscBool flg; 2090 PetscErrorCode ierr; 2091 2092 PetscFunctionBegin; 2093 a = matA->A; b = matA->B; 2094 c = matB->A; d = matB->B; 2095 2096 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2097 if (flg) { 2098 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2099 } 2100 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2101 PetscFunctionReturn(0); 2102 } 2103 2104 #undef __FUNCT__ 2105 #define __FUNCT__ "MatCopy_MPIAIJ" 2106 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2107 { 2108 PetscErrorCode ierr; 2109 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2110 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2111 2112 PetscFunctionBegin; 2113 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2114 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2115 /* because of the column compression in the off-processor part of the matrix a->B, 2116 the number of columns in a->B and b->B may be different, hence we cannot call 2117 the MatCopy() directly on the two parts. If need be, we can provide a more 2118 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2119 then copying the submatrices */ 2120 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2121 } else { 2122 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2123 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2124 } 2125 PetscFunctionReturn(0); 2126 } 2127 2128 #undef __FUNCT__ 2129 #define __FUNCT__ "MatSetUp_MPIAIJ" 2130 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2131 { 2132 PetscErrorCode ierr; 2133 2134 PetscFunctionBegin; 2135 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /* 2140 Computes the number of nonzeros per row needed for preallocation when X and Y 2141 have different nonzero structure. 2142 */ 2143 #undef __FUNCT__ 2144 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2145 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2146 { 2147 PetscInt i,j,k,nzx,nzy; 2148 2149 PetscFunctionBegin; 2150 /* Set the number of nonzeros in the new matrix */ 2151 for (i=0; i<m; i++) { 2152 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2153 nzx = xi[i+1] - xi[i]; 2154 nzy = yi[i+1] - yi[i]; 2155 nnz[i] = 0; 2156 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2157 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2158 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2159 nnz[i]++; 2160 } 2161 for (; k<nzy; k++) nnz[i]++; 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2169 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2170 { 2171 PetscErrorCode ierr; 2172 PetscInt m = Y->rmap->N; 2173 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2174 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2175 2176 PetscFunctionBegin; 2177 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2178 PetscFunctionReturn(0); 2179 } 2180 2181 #undef __FUNCT__ 2182 #define __FUNCT__ "MatAXPY_MPIAIJ" 2183 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2184 { 2185 PetscErrorCode ierr; 2186 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2187 PetscBLASInt bnz,one=1; 2188 Mat_SeqAIJ *x,*y; 2189 2190 PetscFunctionBegin; 2191 if (str == SAME_NONZERO_PATTERN) { 2192 PetscScalar alpha = a; 2193 x = (Mat_SeqAIJ*)xx->A->data; 2194 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2195 y = (Mat_SeqAIJ*)yy->A->data; 2196 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2197 x = (Mat_SeqAIJ*)xx->B->data; 2198 y = (Mat_SeqAIJ*)yy->B->data; 2199 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2200 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2201 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2202 } else { 2203 Mat B; 2204 PetscInt *nnz_d,*nnz_o; 2205 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2206 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2207 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2208 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2209 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2210 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2211 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2212 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2213 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2214 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2215 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2216 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2217 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2218 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2219 } 2220 PetscFunctionReturn(0); 2221 } 2222 2223 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2224 2225 #undef __FUNCT__ 2226 #define __FUNCT__ "MatConjugate_MPIAIJ" 2227 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2228 { 2229 #if defined(PETSC_USE_COMPLEX) 2230 PetscErrorCode ierr; 2231 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2232 2233 PetscFunctionBegin; 2234 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2235 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2236 #else 2237 PetscFunctionBegin; 2238 #endif 2239 PetscFunctionReturn(0); 2240 } 2241 2242 #undef __FUNCT__ 2243 #define __FUNCT__ "MatRealPart_MPIAIJ" 2244 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2245 { 2246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2247 PetscErrorCode ierr; 2248 2249 PetscFunctionBegin; 2250 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2251 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2252 PetscFunctionReturn(0); 2253 } 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2257 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2258 { 2259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2260 PetscErrorCode ierr; 2261 2262 PetscFunctionBegin; 2263 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2264 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #if defined(PETSC_HAVE_PBGL) 2269 2270 #include <boost/parallel/mpi/bsp_process_group.hpp> 2271 #include <boost/graph/distributed/ilu_default_graph.hpp> 2272 #include <boost/graph/distributed/ilu_0_block.hpp> 2273 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2274 #include <boost/graph/distributed/petsc/interface.hpp> 2275 #include <boost/multi_array.hpp> 2276 #include <boost/parallel/distributed_property_map->hpp> 2277 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2280 /* 2281 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2282 */ 2283 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2284 { 2285 namespace petsc = boost::distributed::petsc; 2286 2287 namespace graph_dist = boost::graph::distributed; 2288 using boost::graph::distributed::ilu_default::process_group_type; 2289 using boost::graph::ilu_permuted; 2290 2291 PetscBool row_identity, col_identity; 2292 PetscContainer c; 2293 PetscInt m, n, M, N; 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2298 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2299 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2300 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2301 2302 process_group_type pg; 2303 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2304 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2305 lgraph_type& level_graph = *lgraph_p; 2306 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2307 2308 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2309 ilu_permuted(level_graph); 2310 2311 /* put together the new matrix */ 2312 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2313 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2314 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2315 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2316 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2317 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2318 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2319 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2320 2321 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2322 ierr = PetscContainerSetPointer(c, lgraph_p); 2323 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2324 ierr = PetscContainerDestroy(&c); 2325 PetscFunctionReturn(0); 2326 } 2327 2328 #undef __FUNCT__ 2329 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2330 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2331 { 2332 PetscFunctionBegin; 2333 PetscFunctionReturn(0); 2334 } 2335 2336 #undef __FUNCT__ 2337 #define __FUNCT__ "MatSolve_MPIAIJ" 2338 /* 2339 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2340 */ 2341 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2342 { 2343 namespace graph_dist = boost::graph::distributed; 2344 2345 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2346 lgraph_type *lgraph_p; 2347 PetscContainer c; 2348 PetscErrorCode ierr; 2349 2350 PetscFunctionBegin; 2351 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2352 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2353 ierr = VecCopy(b, x);CHKERRQ(ierr); 2354 2355 PetscScalar *array_x; 2356 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2357 PetscInt sx; 2358 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2359 2360 PetscScalar *array_b; 2361 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2362 PetscInt sb; 2363 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2364 2365 lgraph_type& level_graph = *lgraph_p; 2366 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2367 2368 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2369 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2370 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2371 2372 typedef boost::iterator_property_map<array_ref_type::iterator, 2373 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2374 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2375 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2376 2377 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2378 PetscFunctionReturn(0); 2379 } 2380 #endif 2381 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2385 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2386 { 2387 PetscMPIInt rank,size; 2388 MPI_Comm comm; 2389 PetscErrorCode ierr; 2390 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2391 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2392 PetscInt *rowrange = mat->rmap->range; 2393 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2394 Mat A = aij->A,B=aij->B,C=*matredundant; 2395 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2396 PetscScalar *sbuf_a; 2397 PetscInt nzlocal=a->nz+b->nz; 2398 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2399 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2400 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2401 MatScalar *aworkA,*aworkB; 2402 PetscScalar *vals; 2403 PetscMPIInt tag1,tag2,tag3,imdex; 2404 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2405 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2406 MPI_Status recv_status,*send_status; 2407 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2408 PetscInt **rbuf_j=NULL; 2409 PetscScalar **rbuf_a=NULL; 2410 Mat_Redundant *redund =NULL; 2411 2412 PetscFunctionBegin; 2413 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2414 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2415 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2416 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2417 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2418 2419 if (reuse == MAT_REUSE_MATRIX) { 2420 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2421 if (subsize == 1) { 2422 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2423 redund = c->redundant; 2424 } else { 2425 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2426 redund = c->redundant; 2427 } 2428 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2429 2430 nsends = redund->nsends; 2431 nrecvs = redund->nrecvs; 2432 send_rank = redund->send_rank; 2433 recv_rank = redund->recv_rank; 2434 sbuf_nz = redund->sbuf_nz; 2435 rbuf_nz = redund->rbuf_nz; 2436 sbuf_j = redund->sbuf_j; 2437 sbuf_a = redund->sbuf_a; 2438 rbuf_j = redund->rbuf_j; 2439 rbuf_a = redund->rbuf_a; 2440 } 2441 2442 if (reuse == MAT_INITIAL_MATRIX) { 2443 PetscInt nleftover,np_subcomm; 2444 2445 /* get the destination processors' id send_rank, nsends and nrecvs */ 2446 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2447 2448 np_subcomm = size/nsubcomm; 2449 nleftover = size - nsubcomm*np_subcomm; 2450 2451 /* block of codes below is specific for INTERLACED */ 2452 /* ------------------------------------------------*/ 2453 nsends = 0; nrecvs = 0; 2454 for (i=0; i<size; i++) { 2455 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2456 send_rank[nsends++] = i; 2457 recv_rank[nrecvs++] = i; 2458 } 2459 } 2460 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2461 i = size-nleftover-1; 2462 j = 0; 2463 while (j < nsubcomm - nleftover) { 2464 send_rank[nsends++] = i; 2465 i--; j++; 2466 } 2467 } 2468 2469 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2470 for (i=0; i<nleftover; i++) { 2471 recv_rank[nrecvs++] = size-nleftover+i; 2472 } 2473 } 2474 /*----------------------------------------------*/ 2475 2476 /* allocate sbuf_j, sbuf_a */ 2477 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2478 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2479 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2480 /* 2481 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2482 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2483 */ 2484 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2485 2486 /* copy mat's local entries into the buffers */ 2487 if (reuse == MAT_INITIAL_MATRIX) { 2488 rownz_max = 0; 2489 rptr = sbuf_j; 2490 cols = sbuf_j + rend-rstart + 1; 2491 vals = sbuf_a; 2492 rptr[0] = 0; 2493 for (i=0; i<rend-rstart; i++) { 2494 row = i + rstart; 2495 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2496 ncols = nzA + nzB; 2497 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2498 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2499 /* load the column indices for this row into cols */ 2500 lwrite = 0; 2501 for (l=0; l<nzB; l++) { 2502 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2503 vals[lwrite] = aworkB[l]; 2504 cols[lwrite++] = ctmp; 2505 } 2506 } 2507 for (l=0; l<nzA; l++) { 2508 vals[lwrite] = aworkA[l]; 2509 cols[lwrite++] = cstart + cworkA[l]; 2510 } 2511 for (l=0; l<nzB; l++) { 2512 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2513 vals[lwrite] = aworkB[l]; 2514 cols[lwrite++] = ctmp; 2515 } 2516 } 2517 vals += ncols; 2518 cols += ncols; 2519 rptr[i+1] = rptr[i] + ncols; 2520 if (rownz_max < ncols) rownz_max = ncols; 2521 } 2522 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2523 } else { /* only copy matrix values into sbuf_a */ 2524 rptr = sbuf_j; 2525 vals = sbuf_a; 2526 rptr[0] = 0; 2527 for (i=0; i<rend-rstart; i++) { 2528 row = i + rstart; 2529 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2530 ncols = nzA + nzB; 2531 cworkB = b->j + b->i[i]; 2532 aworkA = a->a + a->i[i]; 2533 aworkB = b->a + b->i[i]; 2534 lwrite = 0; 2535 for (l=0; l<nzB; l++) { 2536 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2537 } 2538 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2539 for (l=0; l<nzB; l++) { 2540 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2541 } 2542 vals += ncols; 2543 rptr[i+1] = rptr[i] + ncols; 2544 } 2545 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2546 2547 /* send nzlocal to others, and recv other's nzlocal */ 2548 /*--------------------------------------------------*/ 2549 if (reuse == MAT_INITIAL_MATRIX) { 2550 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2551 2552 s_waits2 = s_waits3 + nsends; 2553 s_waits1 = s_waits2 + nsends; 2554 r_waits1 = s_waits1 + nsends; 2555 r_waits2 = r_waits1 + nrecvs; 2556 r_waits3 = r_waits2 + nrecvs; 2557 } else { 2558 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2559 2560 r_waits3 = s_waits3 + nsends; 2561 } 2562 2563 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2564 if (reuse == MAT_INITIAL_MATRIX) { 2565 /* get new tags to keep the communication clean */ 2566 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2567 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2568 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2569 2570 /* post receives of other's nzlocal */ 2571 for (i=0; i<nrecvs; i++) { 2572 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2573 } 2574 /* send nzlocal to others */ 2575 for (i=0; i<nsends; i++) { 2576 sbuf_nz[i] = nzlocal; 2577 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2578 } 2579 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2580 count = nrecvs; 2581 while (count) { 2582 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2583 2584 recv_rank[imdex] = recv_status.MPI_SOURCE; 2585 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2586 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2587 2588 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2589 2590 rbuf_nz[imdex] += i + 2; 2591 2592 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2593 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2594 count--; 2595 } 2596 /* wait on sends of nzlocal */ 2597 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2598 /* send mat->i,j to others, and recv from other's */ 2599 /*------------------------------------------------*/ 2600 for (i=0; i<nsends; i++) { 2601 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2602 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2603 } 2604 /* wait on receives of mat->i,j */ 2605 /*------------------------------*/ 2606 count = nrecvs; 2607 while (count) { 2608 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2609 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2610 count--; 2611 } 2612 /* wait on sends of mat->i,j */ 2613 /*---------------------------*/ 2614 if (nsends) { 2615 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2616 } 2617 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2618 2619 /* post receives, send and receive mat->a */ 2620 /*----------------------------------------*/ 2621 for (imdex=0; imdex<nrecvs; imdex++) { 2622 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2623 } 2624 for (i=0; i<nsends; i++) { 2625 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2626 } 2627 count = nrecvs; 2628 while (count) { 2629 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2630 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2631 count--; 2632 } 2633 if (nsends) { 2634 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2635 } 2636 2637 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2638 2639 /* create redundant matrix */ 2640 /*-------------------------*/ 2641 if (reuse == MAT_INITIAL_MATRIX) { 2642 const PetscInt *range; 2643 PetscInt rstart_sub,rend_sub,mloc_sub; 2644 2645 /* compute rownz_max for preallocation */ 2646 for (imdex=0; imdex<nrecvs; imdex++) { 2647 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2648 rptr = rbuf_j[imdex]; 2649 for (i=0; i<j; i++) { 2650 ncols = rptr[i+1] - rptr[i]; 2651 if (rownz_max < ncols) rownz_max = ncols; 2652 } 2653 } 2654 2655 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2656 2657 /* get local size of redundant matrix 2658 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2659 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2660 rstart_sub = range[nsubcomm*subrank]; 2661 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2662 rend_sub = range[nsubcomm*(subrank+1)]; 2663 } else { 2664 rend_sub = mat->rmap->N; 2665 } 2666 mloc_sub = rend_sub - rstart_sub; 2667 2668 if (M == N) { 2669 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2670 } else { /* non-square matrix */ 2671 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2672 } 2673 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2674 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2675 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2676 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2677 } else { 2678 C = *matredundant; 2679 } 2680 2681 /* insert local matrix entries */ 2682 rptr = sbuf_j; 2683 cols = sbuf_j + rend-rstart + 1; 2684 vals = sbuf_a; 2685 for (i=0; i<rend-rstart; i++) { 2686 row = i + rstart; 2687 ncols = rptr[i+1] - rptr[i]; 2688 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2689 vals += ncols; 2690 cols += ncols; 2691 } 2692 /* insert received matrix entries */ 2693 for (imdex=0; imdex<nrecvs; imdex++) { 2694 rstart = rowrange[recv_rank[imdex]]; 2695 rend = rowrange[recv_rank[imdex]+1]; 2696 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2697 rptr = rbuf_j[imdex]; 2698 cols = rbuf_j[imdex] + rend-rstart + 1; 2699 vals = rbuf_a[imdex]; 2700 for (i=0; i<rend-rstart; i++) { 2701 row = i + rstart; 2702 ncols = rptr[i+1] - rptr[i]; 2703 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2704 vals += ncols; 2705 cols += ncols; 2706 } 2707 } 2708 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2709 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2710 2711 if (reuse == MAT_INITIAL_MATRIX) { 2712 *matredundant = C; 2713 2714 /* create a supporting struct and attach it to C for reuse */ 2715 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2716 if (subsize == 1) { 2717 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2718 c->redundant = redund; 2719 } else { 2720 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2721 c->redundant = redund; 2722 } 2723 2724 redund->nzlocal = nzlocal; 2725 redund->nsends = nsends; 2726 redund->nrecvs = nrecvs; 2727 redund->send_rank = send_rank; 2728 redund->recv_rank = recv_rank; 2729 redund->sbuf_nz = sbuf_nz; 2730 redund->rbuf_nz = rbuf_nz; 2731 redund->sbuf_j = sbuf_j; 2732 redund->sbuf_a = sbuf_a; 2733 redund->rbuf_j = rbuf_j; 2734 redund->rbuf_a = rbuf_a; 2735 redund->psubcomm = NULL; 2736 } 2737 PetscFunctionReturn(0); 2738 } 2739 2740 #undef __FUNCT__ 2741 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2742 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2743 { 2744 PetscErrorCode ierr; 2745 MPI_Comm comm; 2746 PetscMPIInt size,subsize; 2747 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2748 Mat_Redundant *redund=NULL; 2749 PetscSubcomm psubcomm=NULL; 2750 MPI_Comm subcomm_in=subcomm; 2751 Mat *matseq; 2752 IS isrow,iscol; 2753 2754 PetscFunctionBegin; 2755 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2756 if (reuse == MAT_INITIAL_MATRIX) { 2757 /* create psubcomm, then get subcomm */ 2758 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2759 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2760 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2761 2762 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2763 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2764 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2765 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2766 subcomm = psubcomm->comm; 2767 } else { /* retrieve psubcomm and subcomm */ 2768 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2769 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2770 if (subsize == 1) { 2771 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2772 redund = c->redundant; 2773 } else { 2774 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2775 redund = c->redundant; 2776 } 2777 psubcomm = redund->psubcomm; 2778 } 2779 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2780 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2781 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2782 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2783 if (subsize == 1) { 2784 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2785 c->redundant->psubcomm = psubcomm; 2786 } else { 2787 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2788 c->redundant->psubcomm = psubcomm ; 2789 } 2790 } 2791 PetscFunctionReturn(0); 2792 } 2793 } 2794 2795 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2796 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2797 if (reuse == MAT_INITIAL_MATRIX) { 2798 /* create a local sequential matrix matseq[0] */ 2799 mloc_sub = PETSC_DECIDE; 2800 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2801 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2802 rstart = rend - mloc_sub; 2803 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2804 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2805 } else { /* reuse == MAT_REUSE_MATRIX */ 2806 if (subsize == 1) { 2807 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2808 redund = c->redundant; 2809 } else { 2810 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2811 redund = c->redundant; 2812 } 2813 2814 isrow = redund->isrow; 2815 iscol = redund->iscol; 2816 matseq = redund->matseq; 2817 } 2818 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2819 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2820 2821 if (reuse == MAT_INITIAL_MATRIX) { 2822 /* create a supporting struct and attach it to C for reuse */ 2823 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2824 if (subsize == 1) { 2825 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2826 c->redundant = redund; 2827 } else { 2828 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2829 c->redundant = redund; 2830 } 2831 redund->isrow = isrow; 2832 redund->iscol = iscol; 2833 redund->matseq = matseq; 2834 redund->psubcomm = psubcomm; 2835 } 2836 PetscFunctionReturn(0); 2837 } 2838 2839 #undef __FUNCT__ 2840 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2841 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2842 { 2843 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2844 PetscErrorCode ierr; 2845 PetscInt i,*idxb = 0; 2846 PetscScalar *va,*vb; 2847 Vec vtmp; 2848 2849 PetscFunctionBegin; 2850 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2851 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2852 if (idx) { 2853 for (i=0; i<A->rmap->n; i++) { 2854 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2855 } 2856 } 2857 2858 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2859 if (idx) { 2860 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2861 } 2862 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2863 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2864 2865 for (i=0; i<A->rmap->n; i++) { 2866 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2867 va[i] = vb[i]; 2868 if (idx) idx[i] = a->garray[idxb[i]]; 2869 } 2870 } 2871 2872 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2873 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2874 ierr = PetscFree(idxb);CHKERRQ(ierr); 2875 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2876 PetscFunctionReturn(0); 2877 } 2878 2879 #undef __FUNCT__ 2880 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2881 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2882 { 2883 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2884 PetscErrorCode ierr; 2885 PetscInt i,*idxb = 0; 2886 PetscScalar *va,*vb; 2887 Vec vtmp; 2888 2889 PetscFunctionBegin; 2890 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2891 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2892 if (idx) { 2893 for (i=0; i<A->cmap->n; i++) { 2894 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2895 } 2896 } 2897 2898 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2899 if (idx) { 2900 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2901 } 2902 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2903 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2904 2905 for (i=0; i<A->rmap->n; i++) { 2906 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2907 va[i] = vb[i]; 2908 if (idx) idx[i] = a->garray[idxb[i]]; 2909 } 2910 } 2911 2912 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2913 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2914 ierr = PetscFree(idxb);CHKERRQ(ierr); 2915 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2916 PetscFunctionReturn(0); 2917 } 2918 2919 #undef __FUNCT__ 2920 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2921 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2922 { 2923 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2924 PetscInt n = A->rmap->n; 2925 PetscInt cstart = A->cmap->rstart; 2926 PetscInt *cmap = mat->garray; 2927 PetscInt *diagIdx, *offdiagIdx; 2928 Vec diagV, offdiagV; 2929 PetscScalar *a, *diagA, *offdiagA; 2930 PetscInt r; 2931 PetscErrorCode ierr; 2932 2933 PetscFunctionBegin; 2934 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2935 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2936 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2937 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2938 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2939 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2940 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2941 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2942 for (r = 0; r < n; ++r) { 2943 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2944 a[r] = diagA[r]; 2945 idx[r] = cstart + diagIdx[r]; 2946 } else { 2947 a[r] = offdiagA[r]; 2948 idx[r] = cmap[offdiagIdx[r]]; 2949 } 2950 } 2951 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2952 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2953 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2954 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2955 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2956 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2957 PetscFunctionReturn(0); 2958 } 2959 2960 #undef __FUNCT__ 2961 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2962 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2963 { 2964 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2965 PetscInt n = A->rmap->n; 2966 PetscInt cstart = A->cmap->rstart; 2967 PetscInt *cmap = mat->garray; 2968 PetscInt *diagIdx, *offdiagIdx; 2969 Vec diagV, offdiagV; 2970 PetscScalar *a, *diagA, *offdiagA; 2971 PetscInt r; 2972 PetscErrorCode ierr; 2973 2974 PetscFunctionBegin; 2975 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2976 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2977 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2978 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2979 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2980 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2981 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2982 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2983 for (r = 0; r < n; ++r) { 2984 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2985 a[r] = diagA[r]; 2986 idx[r] = cstart + diagIdx[r]; 2987 } else { 2988 a[r] = offdiagA[r]; 2989 idx[r] = cmap[offdiagIdx[r]]; 2990 } 2991 } 2992 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2993 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2994 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2995 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2996 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2997 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2998 PetscFunctionReturn(0); 2999 } 3000 3001 #undef __FUNCT__ 3002 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3003 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3004 { 3005 PetscErrorCode ierr; 3006 Mat *dummy; 3007 3008 PetscFunctionBegin; 3009 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3010 *newmat = *dummy; 3011 ierr = PetscFree(dummy);CHKERRQ(ierr); 3012 PetscFunctionReturn(0); 3013 } 3014 3015 #undef __FUNCT__ 3016 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3017 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3018 { 3019 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3020 PetscErrorCode ierr; 3021 3022 PetscFunctionBegin; 3023 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3024 PetscFunctionReturn(0); 3025 } 3026 3027 #undef __FUNCT__ 3028 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3029 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3030 { 3031 PetscErrorCode ierr; 3032 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3033 3034 PetscFunctionBegin; 3035 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3036 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3037 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3038 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3039 PetscFunctionReturn(0); 3040 } 3041 3042 /* -------------------------------------------------------------------*/ 3043 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3044 MatGetRow_MPIAIJ, 3045 MatRestoreRow_MPIAIJ, 3046 MatMult_MPIAIJ, 3047 /* 4*/ MatMultAdd_MPIAIJ, 3048 MatMultTranspose_MPIAIJ, 3049 MatMultTransposeAdd_MPIAIJ, 3050 #if defined(PETSC_HAVE_PBGL) 3051 MatSolve_MPIAIJ, 3052 #else 3053 0, 3054 #endif 3055 0, 3056 0, 3057 /*10*/ 0, 3058 0, 3059 0, 3060 MatSOR_MPIAIJ, 3061 MatTranspose_MPIAIJ, 3062 /*15*/ MatGetInfo_MPIAIJ, 3063 MatEqual_MPIAIJ, 3064 MatGetDiagonal_MPIAIJ, 3065 MatDiagonalScale_MPIAIJ, 3066 MatNorm_MPIAIJ, 3067 /*20*/ MatAssemblyBegin_MPIAIJ, 3068 MatAssemblyEnd_MPIAIJ, 3069 MatSetOption_MPIAIJ, 3070 MatZeroEntries_MPIAIJ, 3071 /*24*/ MatZeroRows_MPIAIJ, 3072 0, 3073 #if defined(PETSC_HAVE_PBGL) 3074 0, 3075 #else 3076 0, 3077 #endif 3078 0, 3079 0, 3080 /*29*/ MatSetUp_MPIAIJ, 3081 #if defined(PETSC_HAVE_PBGL) 3082 0, 3083 #else 3084 0, 3085 #endif 3086 0, 3087 0, 3088 0, 3089 /*34*/ MatDuplicate_MPIAIJ, 3090 0, 3091 0, 3092 0, 3093 0, 3094 /*39*/ MatAXPY_MPIAIJ, 3095 MatGetSubMatrices_MPIAIJ, 3096 MatIncreaseOverlap_MPIAIJ, 3097 MatGetValues_MPIAIJ, 3098 MatCopy_MPIAIJ, 3099 /*44*/ MatGetRowMax_MPIAIJ, 3100 MatScale_MPIAIJ, 3101 0, 3102 MatDiagonalSet_MPIAIJ, 3103 MatZeroRowsColumns_MPIAIJ, 3104 /*49*/ MatSetRandom_MPIAIJ, 3105 0, 3106 0, 3107 0, 3108 0, 3109 /*54*/ MatFDColoringCreate_MPIXAIJ, 3110 0, 3111 MatSetUnfactored_MPIAIJ, 3112 MatPermute_MPIAIJ, 3113 0, 3114 /*59*/ MatGetSubMatrix_MPIAIJ, 3115 MatDestroy_MPIAIJ, 3116 MatView_MPIAIJ, 3117 0, 3118 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3119 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3120 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3121 0, 3122 0, 3123 0, 3124 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3125 MatGetRowMinAbs_MPIAIJ, 3126 0, 3127 MatSetColoring_MPIAIJ, 3128 0, 3129 MatSetValuesAdifor_MPIAIJ, 3130 /*75*/ MatFDColoringApply_AIJ, 3131 0, 3132 0, 3133 0, 3134 MatFindZeroDiagonals_MPIAIJ, 3135 /*80*/ 0, 3136 0, 3137 0, 3138 /*83*/ MatLoad_MPIAIJ, 3139 0, 3140 0, 3141 0, 3142 0, 3143 0, 3144 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3145 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3146 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3147 MatPtAP_MPIAIJ_MPIAIJ, 3148 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3149 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3150 0, 3151 0, 3152 0, 3153 0, 3154 /*99*/ 0, 3155 0, 3156 0, 3157 MatConjugate_MPIAIJ, 3158 0, 3159 /*104*/MatSetValuesRow_MPIAIJ, 3160 MatRealPart_MPIAIJ, 3161 MatImaginaryPart_MPIAIJ, 3162 0, 3163 0, 3164 /*109*/0, 3165 MatGetRedundantMatrix_MPIAIJ, 3166 MatGetRowMin_MPIAIJ, 3167 0, 3168 0, 3169 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3170 0, 3171 0, 3172 0, 3173 0, 3174 /*119*/0, 3175 0, 3176 0, 3177 0, 3178 MatGetMultiProcBlock_MPIAIJ, 3179 /*124*/MatFindNonzeroRows_MPIAIJ, 3180 MatGetColumnNorms_MPIAIJ, 3181 MatInvertBlockDiagonal_MPIAIJ, 3182 0, 3183 MatGetSubMatricesParallel_MPIAIJ, 3184 /*129*/0, 3185 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3186 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3187 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3188 0, 3189 /*134*/0, 3190 0, 3191 0, 3192 0, 3193 0, 3194 /*139*/0, 3195 0, 3196 0, 3197 MatFDColoringSetUp_MPIXAIJ 3198 }; 3199 3200 /* ----------------------------------------------------------------------------------------*/ 3201 3202 #undef __FUNCT__ 3203 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3204 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3205 { 3206 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3207 PetscErrorCode ierr; 3208 3209 PetscFunctionBegin; 3210 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3211 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3212 PetscFunctionReturn(0); 3213 } 3214 3215 #undef __FUNCT__ 3216 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3217 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3218 { 3219 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3220 PetscErrorCode ierr; 3221 3222 PetscFunctionBegin; 3223 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3224 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3225 PetscFunctionReturn(0); 3226 } 3227 3228 #undef __FUNCT__ 3229 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3230 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3231 { 3232 Mat_MPIAIJ *b; 3233 PetscErrorCode ierr; 3234 3235 PetscFunctionBegin; 3236 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3237 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3238 b = (Mat_MPIAIJ*)B->data; 3239 3240 if (!B->preallocated) { 3241 /* Explicitly create 2 MATSEQAIJ matrices. */ 3242 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3243 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3244 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3245 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3246 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3247 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3248 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3249 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3250 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3251 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3252 } 3253 3254 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3255 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3256 B->preallocated = PETSC_TRUE; 3257 PetscFunctionReturn(0); 3258 } 3259 3260 #undef __FUNCT__ 3261 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3262 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3263 { 3264 Mat mat; 3265 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3266 PetscErrorCode ierr; 3267 3268 PetscFunctionBegin; 3269 *newmat = 0; 3270 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3271 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3272 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3273 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3274 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3275 a = (Mat_MPIAIJ*)mat->data; 3276 3277 mat->factortype = matin->factortype; 3278 mat->assembled = PETSC_TRUE; 3279 mat->insertmode = NOT_SET_VALUES; 3280 mat->preallocated = PETSC_TRUE; 3281 3282 a->size = oldmat->size; 3283 a->rank = oldmat->rank; 3284 a->donotstash = oldmat->donotstash; 3285 a->roworiented = oldmat->roworiented; 3286 a->rowindices = 0; 3287 a->rowvalues = 0; 3288 a->getrowactive = PETSC_FALSE; 3289 3290 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3291 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3292 3293 if (oldmat->colmap) { 3294 #if defined(PETSC_USE_CTABLE) 3295 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3296 #else 3297 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3298 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3299 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3300 #endif 3301 } else a->colmap = 0; 3302 if (oldmat->garray) { 3303 PetscInt len; 3304 len = oldmat->B->cmap->n; 3305 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3306 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3307 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3308 } else a->garray = 0; 3309 3310 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3311 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3312 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3313 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3314 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3315 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3316 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3317 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3318 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3319 *newmat = mat; 3320 PetscFunctionReturn(0); 3321 } 3322 3323 3324 3325 #undef __FUNCT__ 3326 #define __FUNCT__ "MatLoad_MPIAIJ" 3327 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3328 { 3329 PetscScalar *vals,*svals; 3330 MPI_Comm comm; 3331 PetscErrorCode ierr; 3332 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3333 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3334 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3335 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3336 PetscInt cend,cstart,n,*rowners,sizesset=1; 3337 int fd; 3338 PetscInt bs = 1; 3339 3340 PetscFunctionBegin; 3341 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3342 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3343 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3344 if (!rank) { 3345 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3346 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3347 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3348 } 3349 3350 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3351 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3352 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3353 3354 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3355 3356 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3357 M = header[1]; N = header[2]; 3358 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3359 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3360 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3361 3362 /* If global sizes are set, check if they are consistent with that given in the file */ 3363 if (sizesset) { 3364 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3365 } 3366 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3367 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3368 3369 /* determine ownership of all (block) rows */ 3370 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3371 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3372 else m = newMat->rmap->n; /* Set by user */ 3373 3374 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3375 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3376 3377 /* First process needs enough room for process with most rows */ 3378 if (!rank) { 3379 mmax = rowners[1]; 3380 for (i=2; i<=size; i++) { 3381 mmax = PetscMax(mmax, rowners[i]); 3382 } 3383 } else mmax = -1; /* unused, but compilers complain */ 3384 3385 rowners[0] = 0; 3386 for (i=2; i<=size; i++) { 3387 rowners[i] += rowners[i-1]; 3388 } 3389 rstart = rowners[rank]; 3390 rend = rowners[rank+1]; 3391 3392 /* distribute row lengths to all processors */ 3393 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3394 if (!rank) { 3395 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3396 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3397 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3398 for (j=0; j<m; j++) { 3399 procsnz[0] += ourlens[j]; 3400 } 3401 for (i=1; i<size; i++) { 3402 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3403 /* calculate the number of nonzeros on each processor */ 3404 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3405 procsnz[i] += rowlengths[j]; 3406 } 3407 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3408 } 3409 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3410 } else { 3411 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3412 } 3413 3414 if (!rank) { 3415 /* determine max buffer needed and allocate it */ 3416 maxnz = 0; 3417 for (i=0; i<size; i++) { 3418 maxnz = PetscMax(maxnz,procsnz[i]); 3419 } 3420 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3421 3422 /* read in my part of the matrix column indices */ 3423 nz = procsnz[0]; 3424 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3425 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3426 3427 /* read in every one elses and ship off */ 3428 for (i=1; i<size; i++) { 3429 nz = procsnz[i]; 3430 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3431 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3432 } 3433 ierr = PetscFree(cols);CHKERRQ(ierr); 3434 } else { 3435 /* determine buffer space needed for message */ 3436 nz = 0; 3437 for (i=0; i<m; i++) { 3438 nz += ourlens[i]; 3439 } 3440 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3441 3442 /* receive message of column indices*/ 3443 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3444 } 3445 3446 /* determine column ownership if matrix is not square */ 3447 if (N != M) { 3448 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3449 else n = newMat->cmap->n; 3450 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3451 cstart = cend - n; 3452 } else { 3453 cstart = rstart; 3454 cend = rend; 3455 n = cend - cstart; 3456 } 3457 3458 /* loop over local rows, determining number of off diagonal entries */ 3459 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3460 jj = 0; 3461 for (i=0; i<m; i++) { 3462 for (j=0; j<ourlens[i]; j++) { 3463 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3464 jj++; 3465 } 3466 } 3467 3468 for (i=0; i<m; i++) { 3469 ourlens[i] -= offlens[i]; 3470 } 3471 if (!sizesset) { 3472 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3473 } 3474 3475 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3476 3477 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3478 3479 for (i=0; i<m; i++) { 3480 ourlens[i] += offlens[i]; 3481 } 3482 3483 if (!rank) { 3484 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3485 3486 /* read in my part of the matrix numerical values */ 3487 nz = procsnz[0]; 3488 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3489 3490 /* insert into matrix */ 3491 jj = rstart; 3492 smycols = mycols; 3493 svals = vals; 3494 for (i=0; i<m; i++) { 3495 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3496 smycols += ourlens[i]; 3497 svals += ourlens[i]; 3498 jj++; 3499 } 3500 3501 /* read in other processors and ship out */ 3502 for (i=1; i<size; i++) { 3503 nz = procsnz[i]; 3504 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3505 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3506 } 3507 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3508 } else { 3509 /* receive numeric values */ 3510 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3511 3512 /* receive message of values*/ 3513 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3514 3515 /* insert into matrix */ 3516 jj = rstart; 3517 smycols = mycols; 3518 svals = vals; 3519 for (i=0; i<m; i++) { 3520 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3521 smycols += ourlens[i]; 3522 svals += ourlens[i]; 3523 jj++; 3524 } 3525 } 3526 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3527 ierr = PetscFree(vals);CHKERRQ(ierr); 3528 ierr = PetscFree(mycols);CHKERRQ(ierr); 3529 ierr = PetscFree(rowners);CHKERRQ(ierr); 3530 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3531 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3532 PetscFunctionReturn(0); 3533 } 3534 3535 #undef __FUNCT__ 3536 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3537 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3538 { 3539 PetscErrorCode ierr; 3540 IS iscol_local; 3541 PetscInt csize; 3542 3543 PetscFunctionBegin; 3544 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3545 if (call == MAT_REUSE_MATRIX) { 3546 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3547 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3548 } else { 3549 PetscInt cbs; 3550 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3551 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3552 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3553 } 3554 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3555 if (call == MAT_INITIAL_MATRIX) { 3556 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3557 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3558 } 3559 PetscFunctionReturn(0); 3560 } 3561 3562 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3563 #undef __FUNCT__ 3564 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3565 /* 3566 Not great since it makes two copies of the submatrix, first an SeqAIJ 3567 in local and then by concatenating the local matrices the end result. 3568 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3569 3570 Note: This requires a sequential iscol with all indices. 3571 */ 3572 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3573 { 3574 PetscErrorCode ierr; 3575 PetscMPIInt rank,size; 3576 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3577 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3578 PetscBool allcolumns, colflag; 3579 Mat M,Mreuse; 3580 MatScalar *vwork,*aa; 3581 MPI_Comm comm; 3582 Mat_SeqAIJ *aij; 3583 3584 PetscFunctionBegin; 3585 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3586 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3587 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3588 3589 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3590 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3591 if (colflag && ncol == mat->cmap->N) { 3592 allcolumns = PETSC_TRUE; 3593 } else { 3594 allcolumns = PETSC_FALSE; 3595 } 3596 if (call == MAT_REUSE_MATRIX) { 3597 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3598 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3599 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3600 } else { 3601 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3602 } 3603 3604 /* 3605 m - number of local rows 3606 n - number of columns (same on all processors) 3607 rstart - first row in new global matrix generated 3608 */ 3609 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3610 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3611 if (call == MAT_INITIAL_MATRIX) { 3612 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3613 ii = aij->i; 3614 jj = aij->j; 3615 3616 /* 3617 Determine the number of non-zeros in the diagonal and off-diagonal 3618 portions of the matrix in order to do correct preallocation 3619 */ 3620 3621 /* first get start and end of "diagonal" columns */ 3622 if (csize == PETSC_DECIDE) { 3623 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3624 if (mglobal == n) { /* square matrix */ 3625 nlocal = m; 3626 } else { 3627 nlocal = n/size + ((n % size) > rank); 3628 } 3629 } else { 3630 nlocal = csize; 3631 } 3632 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3633 rstart = rend - nlocal; 3634 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3635 3636 /* next, compute all the lengths */ 3637 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3638 olens = dlens + m; 3639 for (i=0; i<m; i++) { 3640 jend = ii[i+1] - ii[i]; 3641 olen = 0; 3642 dlen = 0; 3643 for (j=0; j<jend; j++) { 3644 if (*jj < rstart || *jj >= rend) olen++; 3645 else dlen++; 3646 jj++; 3647 } 3648 olens[i] = olen; 3649 dlens[i] = dlen; 3650 } 3651 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3652 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3653 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3654 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3655 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3656 ierr = PetscFree(dlens);CHKERRQ(ierr); 3657 } else { 3658 PetscInt ml,nl; 3659 3660 M = *newmat; 3661 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3662 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3663 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3664 /* 3665 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3666 rather than the slower MatSetValues(). 3667 */ 3668 M->was_assembled = PETSC_TRUE; 3669 M->assembled = PETSC_FALSE; 3670 } 3671 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3672 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3673 ii = aij->i; 3674 jj = aij->j; 3675 aa = aij->a; 3676 for (i=0; i<m; i++) { 3677 row = rstart + i; 3678 nz = ii[i+1] - ii[i]; 3679 cwork = jj; jj += nz; 3680 vwork = aa; aa += nz; 3681 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3682 } 3683 3684 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3685 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3686 *newmat = M; 3687 3688 /* save submatrix used in processor for next request */ 3689 if (call == MAT_INITIAL_MATRIX) { 3690 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3691 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3692 } 3693 PetscFunctionReturn(0); 3694 } 3695 3696 #undef __FUNCT__ 3697 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3698 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3699 { 3700 PetscInt m,cstart, cend,j,nnz,i,d; 3701 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3702 const PetscInt *JJ; 3703 PetscScalar *values; 3704 PetscErrorCode ierr; 3705 3706 PetscFunctionBegin; 3707 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3708 3709 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3710 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3711 m = B->rmap->n; 3712 cstart = B->cmap->rstart; 3713 cend = B->cmap->rend; 3714 rstart = B->rmap->rstart; 3715 3716 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3717 3718 #if defined(PETSC_USE_DEBUGGING) 3719 for (i=0; i<m; i++) { 3720 nnz = Ii[i+1]- Ii[i]; 3721 JJ = J + Ii[i]; 3722 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3723 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3724 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3725 } 3726 #endif 3727 3728 for (i=0; i<m; i++) { 3729 nnz = Ii[i+1]- Ii[i]; 3730 JJ = J + Ii[i]; 3731 nnz_max = PetscMax(nnz_max,nnz); 3732 d = 0; 3733 for (j=0; j<nnz; j++) { 3734 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3735 } 3736 d_nnz[i] = d; 3737 o_nnz[i] = nnz - d; 3738 } 3739 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3740 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3741 3742 if (v) values = (PetscScalar*)v; 3743 else { 3744 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3745 } 3746 3747 for (i=0; i<m; i++) { 3748 ii = i + rstart; 3749 nnz = Ii[i+1]- Ii[i]; 3750 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3751 } 3752 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3753 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3754 3755 if (!v) { 3756 ierr = PetscFree(values);CHKERRQ(ierr); 3757 } 3758 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3759 PetscFunctionReturn(0); 3760 } 3761 3762 #undef __FUNCT__ 3763 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3764 /*@ 3765 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3766 (the default parallel PETSc format). 3767 3768 Collective on MPI_Comm 3769 3770 Input Parameters: 3771 + B - the matrix 3772 . i - the indices into j for the start of each local row (starts with zero) 3773 . j - the column indices for each local row (starts with zero) 3774 - v - optional values in the matrix 3775 3776 Level: developer 3777 3778 Notes: 3779 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3780 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3781 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3782 3783 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3784 3785 The format which is used for the sparse matrix input, is equivalent to a 3786 row-major ordering.. i.e for the following matrix, the input data expected is 3787 as shown: 3788 3789 1 0 0 3790 2 0 3 P0 3791 ------- 3792 4 5 6 P1 3793 3794 Process0 [P0]: rows_owned=[0,1] 3795 i = {0,1,3} [size = nrow+1 = 2+1] 3796 j = {0,0,2} [size = nz = 6] 3797 v = {1,2,3} [size = nz = 6] 3798 3799 Process1 [P1]: rows_owned=[2] 3800 i = {0,3} [size = nrow+1 = 1+1] 3801 j = {0,1,2} [size = nz = 6] 3802 v = {4,5,6} [size = nz = 6] 3803 3804 .keywords: matrix, aij, compressed row, sparse, parallel 3805 3806 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3807 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3808 @*/ 3809 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3810 { 3811 PetscErrorCode ierr; 3812 3813 PetscFunctionBegin; 3814 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3815 PetscFunctionReturn(0); 3816 } 3817 3818 #undef __FUNCT__ 3819 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3820 /*@C 3821 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3822 (the default parallel PETSc format). For good matrix assembly performance 3823 the user should preallocate the matrix storage by setting the parameters 3824 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3825 performance can be increased by more than a factor of 50. 3826 3827 Collective on MPI_Comm 3828 3829 Input Parameters: 3830 + B - the matrix 3831 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3832 (same value is used for all local rows) 3833 . d_nnz - array containing the number of nonzeros in the various rows of the 3834 DIAGONAL portion of the local submatrix (possibly different for each row) 3835 or NULL, if d_nz is used to specify the nonzero structure. 3836 The size of this array is equal to the number of local rows, i.e 'm'. 3837 For matrices that will be factored, you must leave room for (and set) 3838 the diagonal entry even if it is zero. 3839 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3840 submatrix (same value is used for all local rows). 3841 - o_nnz - array containing the number of nonzeros in the various rows of the 3842 OFF-DIAGONAL portion of the local submatrix (possibly different for 3843 each row) or NULL, if o_nz is used to specify the nonzero 3844 structure. The size of this array is equal to the number 3845 of local rows, i.e 'm'. 3846 3847 If the *_nnz parameter is given then the *_nz parameter is ignored 3848 3849 The AIJ format (also called the Yale sparse matrix format or 3850 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3851 storage. The stored row and column indices begin with zero. 3852 See Users-Manual: ch_mat for details. 3853 3854 The parallel matrix is partitioned such that the first m0 rows belong to 3855 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3856 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3857 3858 The DIAGONAL portion of the local submatrix of a processor can be defined 3859 as the submatrix which is obtained by extraction the part corresponding to 3860 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3861 first row that belongs to the processor, r2 is the last row belonging to 3862 the this processor, and c1-c2 is range of indices of the local part of a 3863 vector suitable for applying the matrix to. This is an mxn matrix. In the 3864 common case of a square matrix, the row and column ranges are the same and 3865 the DIAGONAL part is also square. The remaining portion of the local 3866 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3867 3868 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3869 3870 You can call MatGetInfo() to get information on how effective the preallocation was; 3871 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3872 You can also run with the option -info and look for messages with the string 3873 malloc in them to see if additional memory allocation was needed. 3874 3875 Example usage: 3876 3877 Consider the following 8x8 matrix with 34 non-zero values, that is 3878 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3879 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3880 as follows: 3881 3882 .vb 3883 1 2 0 | 0 3 0 | 0 4 3884 Proc0 0 5 6 | 7 0 0 | 8 0 3885 9 0 10 | 11 0 0 | 12 0 3886 ------------------------------------- 3887 13 0 14 | 15 16 17 | 0 0 3888 Proc1 0 18 0 | 19 20 21 | 0 0 3889 0 0 0 | 22 23 0 | 24 0 3890 ------------------------------------- 3891 Proc2 25 26 27 | 0 0 28 | 29 0 3892 30 0 0 | 31 32 33 | 0 34 3893 .ve 3894 3895 This can be represented as a collection of submatrices as: 3896 3897 .vb 3898 A B C 3899 D E F 3900 G H I 3901 .ve 3902 3903 Where the submatrices A,B,C are owned by proc0, D,E,F are 3904 owned by proc1, G,H,I are owned by proc2. 3905 3906 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3907 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3908 The 'M','N' parameters are 8,8, and have the same values on all procs. 3909 3910 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3911 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3912 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3913 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3914 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3915 matrix, ans [DF] as another SeqAIJ matrix. 3916 3917 When d_nz, o_nz parameters are specified, d_nz storage elements are 3918 allocated for every row of the local diagonal submatrix, and o_nz 3919 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3920 One way to choose d_nz and o_nz is to use the max nonzerors per local 3921 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3922 In this case, the values of d_nz,o_nz are: 3923 .vb 3924 proc0 : dnz = 2, o_nz = 2 3925 proc1 : dnz = 3, o_nz = 2 3926 proc2 : dnz = 1, o_nz = 4 3927 .ve 3928 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3929 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3930 for proc3. i.e we are using 12+15+10=37 storage locations to store 3931 34 values. 3932 3933 When d_nnz, o_nnz parameters are specified, the storage is specified 3934 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3935 In the above case the values for d_nnz,o_nnz are: 3936 .vb 3937 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3938 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3939 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3940 .ve 3941 Here the space allocated is sum of all the above values i.e 34, and 3942 hence pre-allocation is perfect. 3943 3944 Level: intermediate 3945 3946 .keywords: matrix, aij, compressed row, sparse, parallel 3947 3948 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3949 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3950 @*/ 3951 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3952 { 3953 PetscErrorCode ierr; 3954 3955 PetscFunctionBegin; 3956 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3957 PetscValidType(B,1); 3958 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3959 PetscFunctionReturn(0); 3960 } 3961 3962 #undef __FUNCT__ 3963 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3964 /*@ 3965 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3966 CSR format the local rows. 3967 3968 Collective on MPI_Comm 3969 3970 Input Parameters: 3971 + comm - MPI communicator 3972 . m - number of local rows (Cannot be PETSC_DECIDE) 3973 . n - This value should be the same as the local size used in creating the 3974 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3975 calculated if N is given) For square matrices n is almost always m. 3976 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3977 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3978 . i - row indices 3979 . j - column indices 3980 - a - matrix values 3981 3982 Output Parameter: 3983 . mat - the matrix 3984 3985 Level: intermediate 3986 3987 Notes: 3988 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3989 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3990 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3991 3992 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3993 3994 The format which is used for the sparse matrix input, is equivalent to a 3995 row-major ordering.. i.e for the following matrix, the input data expected is 3996 as shown: 3997 3998 1 0 0 3999 2 0 3 P0 4000 ------- 4001 4 5 6 P1 4002 4003 Process0 [P0]: rows_owned=[0,1] 4004 i = {0,1,3} [size = nrow+1 = 2+1] 4005 j = {0,0,2} [size = nz = 6] 4006 v = {1,2,3} [size = nz = 6] 4007 4008 Process1 [P1]: rows_owned=[2] 4009 i = {0,3} [size = nrow+1 = 1+1] 4010 j = {0,1,2} [size = nz = 6] 4011 v = {4,5,6} [size = nz = 6] 4012 4013 .keywords: matrix, aij, compressed row, sparse, parallel 4014 4015 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4016 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4017 @*/ 4018 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4019 { 4020 PetscErrorCode ierr; 4021 4022 PetscFunctionBegin; 4023 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4024 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4025 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4026 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4027 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4028 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4029 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4030 PetscFunctionReturn(0); 4031 } 4032 4033 #undef __FUNCT__ 4034 #define __FUNCT__ "MatCreateAIJ" 4035 /*@C 4036 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4037 (the default parallel PETSc format). For good matrix assembly performance 4038 the user should preallocate the matrix storage by setting the parameters 4039 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4040 performance can be increased by more than a factor of 50. 4041 4042 Collective on MPI_Comm 4043 4044 Input Parameters: 4045 + comm - MPI communicator 4046 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4047 This value should be the same as the local size used in creating the 4048 y vector for the matrix-vector product y = Ax. 4049 . n - This value should be the same as the local size used in creating the 4050 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4051 calculated if N is given) For square matrices n is almost always m. 4052 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4053 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4054 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4055 (same value is used for all local rows) 4056 . d_nnz - array containing the number of nonzeros in the various rows of the 4057 DIAGONAL portion of the local submatrix (possibly different for each row) 4058 or NULL, if d_nz is used to specify the nonzero structure. 4059 The size of this array is equal to the number of local rows, i.e 'm'. 4060 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4061 submatrix (same value is used for all local rows). 4062 - o_nnz - array containing the number of nonzeros in the various rows of the 4063 OFF-DIAGONAL portion of the local submatrix (possibly different for 4064 each row) or NULL, if o_nz is used to specify the nonzero 4065 structure. The size of this array is equal to the number 4066 of local rows, i.e 'm'. 4067 4068 Output Parameter: 4069 . A - the matrix 4070 4071 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4072 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4073 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4074 4075 Notes: 4076 If the *_nnz parameter is given then the *_nz parameter is ignored 4077 4078 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4079 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4080 storage requirements for this matrix. 4081 4082 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4083 processor than it must be used on all processors that share the object for 4084 that argument. 4085 4086 The user MUST specify either the local or global matrix dimensions 4087 (possibly both). 4088 4089 The parallel matrix is partitioned across processors such that the 4090 first m0 rows belong to process 0, the next m1 rows belong to 4091 process 1, the next m2 rows belong to process 2 etc.. where 4092 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4093 values corresponding to [m x N] submatrix. 4094 4095 The columns are logically partitioned with the n0 columns belonging 4096 to 0th partition, the next n1 columns belonging to the next 4097 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4098 4099 The DIAGONAL portion of the local submatrix on any given processor 4100 is the submatrix corresponding to the rows and columns m,n 4101 corresponding to the given processor. i.e diagonal matrix on 4102 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4103 etc. The remaining portion of the local submatrix [m x (N-n)] 4104 constitute the OFF-DIAGONAL portion. The example below better 4105 illustrates this concept. 4106 4107 For a square global matrix we define each processor's diagonal portion 4108 to be its local rows and the corresponding columns (a square submatrix); 4109 each processor's off-diagonal portion encompasses the remainder of the 4110 local matrix (a rectangular submatrix). 4111 4112 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4113 4114 When calling this routine with a single process communicator, a matrix of 4115 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4116 type of communicator, use the construction mechanism: 4117 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4118 4119 By default, this format uses inodes (identical nodes) when possible. 4120 We search for consecutive rows with the same nonzero structure, thereby 4121 reusing matrix information to achieve increased efficiency. 4122 4123 Options Database Keys: 4124 + -mat_no_inode - Do not use inodes 4125 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4126 - -mat_aij_oneindex - Internally use indexing starting at 1 4127 rather than 0. Note that when calling MatSetValues(), 4128 the user still MUST index entries starting at 0! 4129 4130 4131 Example usage: 4132 4133 Consider the following 8x8 matrix with 34 non-zero values, that is 4134 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4135 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4136 as follows: 4137 4138 .vb 4139 1 2 0 | 0 3 0 | 0 4 4140 Proc0 0 5 6 | 7 0 0 | 8 0 4141 9 0 10 | 11 0 0 | 12 0 4142 ------------------------------------- 4143 13 0 14 | 15 16 17 | 0 0 4144 Proc1 0 18 0 | 19 20 21 | 0 0 4145 0 0 0 | 22 23 0 | 24 0 4146 ------------------------------------- 4147 Proc2 25 26 27 | 0 0 28 | 29 0 4148 30 0 0 | 31 32 33 | 0 34 4149 .ve 4150 4151 This can be represented as a collection of submatrices as: 4152 4153 .vb 4154 A B C 4155 D E F 4156 G H I 4157 .ve 4158 4159 Where the submatrices A,B,C are owned by proc0, D,E,F are 4160 owned by proc1, G,H,I are owned by proc2. 4161 4162 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4163 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4164 The 'M','N' parameters are 8,8, and have the same values on all procs. 4165 4166 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4167 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4168 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4169 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4170 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4171 matrix, ans [DF] as another SeqAIJ matrix. 4172 4173 When d_nz, o_nz parameters are specified, d_nz storage elements are 4174 allocated for every row of the local diagonal submatrix, and o_nz 4175 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4176 One way to choose d_nz and o_nz is to use the max nonzerors per local 4177 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4178 In this case, the values of d_nz,o_nz are: 4179 .vb 4180 proc0 : dnz = 2, o_nz = 2 4181 proc1 : dnz = 3, o_nz = 2 4182 proc2 : dnz = 1, o_nz = 4 4183 .ve 4184 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4185 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4186 for proc3. i.e we are using 12+15+10=37 storage locations to store 4187 34 values. 4188 4189 When d_nnz, o_nnz parameters are specified, the storage is specified 4190 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4191 In the above case the values for d_nnz,o_nnz are: 4192 .vb 4193 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4194 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4195 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4196 .ve 4197 Here the space allocated is sum of all the above values i.e 34, and 4198 hence pre-allocation is perfect. 4199 4200 Level: intermediate 4201 4202 .keywords: matrix, aij, compressed row, sparse, parallel 4203 4204 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4205 MPIAIJ, MatCreateMPIAIJWithArrays() 4206 @*/ 4207 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4208 { 4209 PetscErrorCode ierr; 4210 PetscMPIInt size; 4211 4212 PetscFunctionBegin; 4213 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4214 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4215 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4216 if (size > 1) { 4217 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4218 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4219 } else { 4220 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4221 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4222 } 4223 PetscFunctionReturn(0); 4224 } 4225 4226 #undef __FUNCT__ 4227 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4228 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4229 { 4230 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4231 4232 PetscFunctionBegin; 4233 if (Ad) *Ad = a->A; 4234 if (Ao) *Ao = a->B; 4235 if (colmap) *colmap = a->garray; 4236 PetscFunctionReturn(0); 4237 } 4238 4239 #undef __FUNCT__ 4240 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4241 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4242 { 4243 PetscErrorCode ierr; 4244 PetscInt i; 4245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4246 4247 PetscFunctionBegin; 4248 if (coloring->ctype == IS_COLORING_GLOBAL) { 4249 ISColoringValue *allcolors,*colors; 4250 ISColoring ocoloring; 4251 4252 /* set coloring for diagonal portion */ 4253 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4254 4255 /* set coloring for off-diagonal portion */ 4256 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4257 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4258 for (i=0; i<a->B->cmap->n; i++) { 4259 colors[i] = allcolors[a->garray[i]]; 4260 } 4261 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4262 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4263 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4264 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4265 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4266 ISColoringValue *colors; 4267 PetscInt *larray; 4268 ISColoring ocoloring; 4269 4270 /* set coloring for diagonal portion */ 4271 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4272 for (i=0; i<a->A->cmap->n; i++) { 4273 larray[i] = i + A->cmap->rstart; 4274 } 4275 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4276 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4277 for (i=0; i<a->A->cmap->n; i++) { 4278 colors[i] = coloring->colors[larray[i]]; 4279 } 4280 ierr = PetscFree(larray);CHKERRQ(ierr); 4281 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4282 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4283 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4284 4285 /* set coloring for off-diagonal portion */ 4286 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4287 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4288 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4289 for (i=0; i<a->B->cmap->n; i++) { 4290 colors[i] = coloring->colors[larray[i]]; 4291 } 4292 ierr = PetscFree(larray);CHKERRQ(ierr); 4293 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4294 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4295 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4296 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4297 PetscFunctionReturn(0); 4298 } 4299 4300 #undef __FUNCT__ 4301 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4302 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4303 { 4304 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4305 PetscErrorCode ierr; 4306 4307 PetscFunctionBegin; 4308 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4309 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4310 PetscFunctionReturn(0); 4311 } 4312 4313 #undef __FUNCT__ 4314 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4315 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4316 { 4317 PetscErrorCode ierr; 4318 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4319 PetscInt *indx; 4320 4321 PetscFunctionBegin; 4322 /* This routine will ONLY return MPIAIJ type matrix */ 4323 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4324 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4325 if (n == PETSC_DECIDE) { 4326 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4327 } 4328 /* Check sum(n) = N */ 4329 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4330 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4331 4332 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4333 rstart -= m; 4334 4335 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4336 for (i=0; i<m; i++) { 4337 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4338 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4339 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4340 } 4341 4342 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4343 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4344 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4345 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4346 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4347 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4348 PetscFunctionReturn(0); 4349 } 4350 4351 #undef __FUNCT__ 4352 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4353 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4354 { 4355 PetscErrorCode ierr; 4356 PetscInt m,N,i,rstart,nnz,Ii; 4357 PetscInt *indx; 4358 PetscScalar *values; 4359 4360 PetscFunctionBegin; 4361 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4362 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4363 for (i=0; i<m; i++) { 4364 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4365 Ii = i + rstart; 4366 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4367 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4368 } 4369 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4370 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4371 PetscFunctionReturn(0); 4372 } 4373 4374 #undef __FUNCT__ 4375 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4376 /*@ 4377 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4378 matrices from each processor 4379 4380 Collective on MPI_Comm 4381 4382 Input Parameters: 4383 + comm - the communicators the parallel matrix will live on 4384 . inmat - the input sequential matrices 4385 . n - number of local columns (or PETSC_DECIDE) 4386 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4387 4388 Output Parameter: 4389 . outmat - the parallel matrix generated 4390 4391 Level: advanced 4392 4393 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4394 4395 @*/ 4396 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4397 { 4398 PetscErrorCode ierr; 4399 PetscMPIInt size; 4400 4401 PetscFunctionBegin; 4402 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4403 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4404 if (size == 1) { 4405 if (scall == MAT_INITIAL_MATRIX) { 4406 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4407 } else { 4408 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4409 } 4410 } else { 4411 if (scall == MAT_INITIAL_MATRIX) { 4412 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4413 } 4414 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4415 } 4416 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4417 PetscFunctionReturn(0); 4418 } 4419 4420 #undef __FUNCT__ 4421 #define __FUNCT__ "MatFileSplit" 4422 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4423 { 4424 PetscErrorCode ierr; 4425 PetscMPIInt rank; 4426 PetscInt m,N,i,rstart,nnz; 4427 size_t len; 4428 const PetscInt *indx; 4429 PetscViewer out; 4430 char *name; 4431 Mat B; 4432 const PetscScalar *values; 4433 4434 PetscFunctionBegin; 4435 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4436 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4437 /* Should this be the type of the diagonal block of A? */ 4438 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4439 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4440 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4441 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4442 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4443 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4444 for (i=0; i<m; i++) { 4445 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4446 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4447 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4448 } 4449 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4450 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4451 4452 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4453 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4454 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4455 sprintf(name,"%s.%d",outfile,rank); 4456 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4457 ierr = PetscFree(name);CHKERRQ(ierr); 4458 ierr = MatView(B,out);CHKERRQ(ierr); 4459 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4460 ierr = MatDestroy(&B);CHKERRQ(ierr); 4461 PetscFunctionReturn(0); 4462 } 4463 4464 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4465 #undef __FUNCT__ 4466 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4467 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4468 { 4469 PetscErrorCode ierr; 4470 Mat_Merge_SeqsToMPI *merge; 4471 PetscContainer container; 4472 4473 PetscFunctionBegin; 4474 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4475 if (container) { 4476 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4477 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4478 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4489 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4490 ierr = PetscFree(merge);CHKERRQ(ierr); 4491 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4492 } 4493 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4494 PetscFunctionReturn(0); 4495 } 4496 4497 #include <../src/mat/utils/freespace.h> 4498 #include <petscbt.h> 4499 4500 #undef __FUNCT__ 4501 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4502 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4503 { 4504 PetscErrorCode ierr; 4505 MPI_Comm comm; 4506 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4507 PetscMPIInt size,rank,taga,*len_s; 4508 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4509 PetscInt proc,m; 4510 PetscInt **buf_ri,**buf_rj; 4511 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4512 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4513 MPI_Request *s_waits,*r_waits; 4514 MPI_Status *status; 4515 MatScalar *aa=a->a; 4516 MatScalar **abuf_r,*ba_i; 4517 Mat_Merge_SeqsToMPI *merge; 4518 PetscContainer container; 4519 4520 PetscFunctionBegin; 4521 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4522 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4523 4524 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4525 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4526 4527 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4528 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4529 4530 bi = merge->bi; 4531 bj = merge->bj; 4532 buf_ri = merge->buf_ri; 4533 buf_rj = merge->buf_rj; 4534 4535 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4536 owners = merge->rowmap->range; 4537 len_s = merge->len_s; 4538 4539 /* send and recv matrix values */ 4540 /*-----------------------------*/ 4541 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4542 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4543 4544 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4545 for (proc=0,k=0; proc<size; proc++) { 4546 if (!len_s[proc]) continue; 4547 i = owners[proc]; 4548 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4549 k++; 4550 } 4551 4552 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4553 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4554 ierr = PetscFree(status);CHKERRQ(ierr); 4555 4556 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4557 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4558 4559 /* insert mat values of mpimat */ 4560 /*----------------------------*/ 4561 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4562 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4563 4564 for (k=0; k<merge->nrecv; k++) { 4565 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4566 nrows = *(buf_ri_k[k]); 4567 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4568 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4569 } 4570 4571 /* set values of ba */ 4572 m = merge->rowmap->n; 4573 for (i=0; i<m; i++) { 4574 arow = owners[rank] + i; 4575 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4576 bnzi = bi[i+1] - bi[i]; 4577 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4578 4579 /* add local non-zero vals of this proc's seqmat into ba */ 4580 anzi = ai[arow+1] - ai[arow]; 4581 aj = a->j + ai[arow]; 4582 aa = a->a + ai[arow]; 4583 nextaj = 0; 4584 for (j=0; nextaj<anzi; j++) { 4585 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4586 ba_i[j] += aa[nextaj++]; 4587 } 4588 } 4589 4590 /* add received vals into ba */ 4591 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4592 /* i-th row */ 4593 if (i == *nextrow[k]) { 4594 anzi = *(nextai[k]+1) - *nextai[k]; 4595 aj = buf_rj[k] + *(nextai[k]); 4596 aa = abuf_r[k] + *(nextai[k]); 4597 nextaj = 0; 4598 for (j=0; nextaj<anzi; j++) { 4599 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4600 ba_i[j] += aa[nextaj++]; 4601 } 4602 } 4603 nextrow[k]++; nextai[k]++; 4604 } 4605 } 4606 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4607 } 4608 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4609 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4610 4611 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4612 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4613 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4614 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4615 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4616 PetscFunctionReturn(0); 4617 } 4618 4619 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4620 4621 #undef __FUNCT__ 4622 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4623 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4624 { 4625 PetscErrorCode ierr; 4626 Mat B_mpi; 4627 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4628 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4629 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4630 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4631 PetscInt len,proc,*dnz,*onz,bs,cbs; 4632 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4633 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4634 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4635 MPI_Status *status; 4636 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4637 PetscBT lnkbt; 4638 Mat_Merge_SeqsToMPI *merge; 4639 PetscContainer container; 4640 4641 PetscFunctionBegin; 4642 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4643 4644 /* make sure it is a PETSc comm */ 4645 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4646 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4647 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4648 4649 ierr = PetscNew(&merge);CHKERRQ(ierr); 4650 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4651 4652 /* determine row ownership */ 4653 /*---------------------------------------------------------*/ 4654 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4655 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4656 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4657 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4658 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4659 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4660 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4661 4662 m = merge->rowmap->n; 4663 owners = merge->rowmap->range; 4664 4665 /* determine the number of messages to send, their lengths */ 4666 /*---------------------------------------------------------*/ 4667 len_s = merge->len_s; 4668 4669 len = 0; /* length of buf_si[] */ 4670 merge->nsend = 0; 4671 for (proc=0; proc<size; proc++) { 4672 len_si[proc] = 0; 4673 if (proc == rank) { 4674 len_s[proc] = 0; 4675 } else { 4676 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4677 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4678 } 4679 if (len_s[proc]) { 4680 merge->nsend++; 4681 nrows = 0; 4682 for (i=owners[proc]; i<owners[proc+1]; i++) { 4683 if (ai[i+1] > ai[i]) nrows++; 4684 } 4685 len_si[proc] = 2*(nrows+1); 4686 len += len_si[proc]; 4687 } 4688 } 4689 4690 /* determine the number and length of messages to receive for ij-structure */ 4691 /*-------------------------------------------------------------------------*/ 4692 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4693 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4694 4695 /* post the Irecv of j-structure */ 4696 /*-------------------------------*/ 4697 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4698 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4699 4700 /* post the Isend of j-structure */ 4701 /*--------------------------------*/ 4702 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4703 4704 for (proc=0, k=0; proc<size; proc++) { 4705 if (!len_s[proc]) continue; 4706 i = owners[proc]; 4707 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4708 k++; 4709 } 4710 4711 /* receives and sends of j-structure are complete */ 4712 /*------------------------------------------------*/ 4713 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4714 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4715 4716 /* send and recv i-structure */ 4717 /*---------------------------*/ 4718 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4719 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4720 4721 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4722 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4723 for (proc=0,k=0; proc<size; proc++) { 4724 if (!len_s[proc]) continue; 4725 /* form outgoing message for i-structure: 4726 buf_si[0]: nrows to be sent 4727 [1:nrows]: row index (global) 4728 [nrows+1:2*nrows+1]: i-structure index 4729 */ 4730 /*-------------------------------------------*/ 4731 nrows = len_si[proc]/2 - 1; 4732 buf_si_i = buf_si + nrows+1; 4733 buf_si[0] = nrows; 4734 buf_si_i[0] = 0; 4735 nrows = 0; 4736 for (i=owners[proc]; i<owners[proc+1]; i++) { 4737 anzi = ai[i+1] - ai[i]; 4738 if (anzi) { 4739 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4740 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4741 nrows++; 4742 } 4743 } 4744 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4745 k++; 4746 buf_si += len_si[proc]; 4747 } 4748 4749 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4750 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4751 4752 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4753 for (i=0; i<merge->nrecv; i++) { 4754 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4755 } 4756 4757 ierr = PetscFree(len_si);CHKERRQ(ierr); 4758 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4759 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4760 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4761 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4762 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4763 ierr = PetscFree(status);CHKERRQ(ierr); 4764 4765 /* compute a local seq matrix in each processor */ 4766 /*----------------------------------------------*/ 4767 /* allocate bi array and free space for accumulating nonzero column info */ 4768 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4769 bi[0] = 0; 4770 4771 /* create and initialize a linked list */ 4772 nlnk = N+1; 4773 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4774 4775 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4776 len = ai[owners[rank+1]] - ai[owners[rank]]; 4777 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4778 4779 current_space = free_space; 4780 4781 /* determine symbolic info for each local row */ 4782 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4783 4784 for (k=0; k<merge->nrecv; k++) { 4785 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4786 nrows = *buf_ri_k[k]; 4787 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4788 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4789 } 4790 4791 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4792 len = 0; 4793 for (i=0; i<m; i++) { 4794 bnzi = 0; 4795 /* add local non-zero cols of this proc's seqmat into lnk */ 4796 arow = owners[rank] + i; 4797 anzi = ai[arow+1] - ai[arow]; 4798 aj = a->j + ai[arow]; 4799 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4800 bnzi += nlnk; 4801 /* add received col data into lnk */ 4802 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4803 if (i == *nextrow[k]) { /* i-th row */ 4804 anzi = *(nextai[k]+1) - *nextai[k]; 4805 aj = buf_rj[k] + *nextai[k]; 4806 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4807 bnzi += nlnk; 4808 nextrow[k]++; nextai[k]++; 4809 } 4810 } 4811 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4812 4813 /* if free space is not available, make more free space */ 4814 if (current_space->local_remaining<bnzi) { 4815 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4816 nspacedouble++; 4817 } 4818 /* copy data into free space, then initialize lnk */ 4819 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4820 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4821 4822 current_space->array += bnzi; 4823 current_space->local_used += bnzi; 4824 current_space->local_remaining -= bnzi; 4825 4826 bi[i+1] = bi[i] + bnzi; 4827 } 4828 4829 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4830 4831 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4832 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4833 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4834 4835 /* create symbolic parallel matrix B_mpi */ 4836 /*---------------------------------------*/ 4837 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4838 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4839 if (n==PETSC_DECIDE) { 4840 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4841 } else { 4842 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4843 } 4844 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4845 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4846 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4847 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4848 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4849 4850 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4851 B_mpi->assembled = PETSC_FALSE; 4852 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4853 merge->bi = bi; 4854 merge->bj = bj; 4855 merge->buf_ri = buf_ri; 4856 merge->buf_rj = buf_rj; 4857 merge->coi = NULL; 4858 merge->coj = NULL; 4859 merge->owners_co = NULL; 4860 4861 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4862 4863 /* attach the supporting struct to B_mpi for reuse */ 4864 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4865 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4866 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4867 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4868 *mpimat = B_mpi; 4869 4870 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4871 PetscFunctionReturn(0); 4872 } 4873 4874 #undef __FUNCT__ 4875 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4876 /*@C 4877 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4878 matrices from each processor 4879 4880 Collective on MPI_Comm 4881 4882 Input Parameters: 4883 + comm - the communicators the parallel matrix will live on 4884 . seqmat - the input sequential matrices 4885 . m - number of local rows (or PETSC_DECIDE) 4886 . n - number of local columns (or PETSC_DECIDE) 4887 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4888 4889 Output Parameter: 4890 . mpimat - the parallel matrix generated 4891 4892 Level: advanced 4893 4894 Notes: 4895 The dimensions of the sequential matrix in each processor MUST be the same. 4896 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4897 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4898 @*/ 4899 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4900 { 4901 PetscErrorCode ierr; 4902 PetscMPIInt size; 4903 4904 PetscFunctionBegin; 4905 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4906 if (size == 1) { 4907 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4908 if (scall == MAT_INITIAL_MATRIX) { 4909 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4910 } else { 4911 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4912 } 4913 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4914 PetscFunctionReturn(0); 4915 } 4916 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4917 if (scall == MAT_INITIAL_MATRIX) { 4918 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4919 } 4920 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4921 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4922 PetscFunctionReturn(0); 4923 } 4924 4925 #undef __FUNCT__ 4926 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4927 /*@ 4928 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4929 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4930 with MatGetSize() 4931 4932 Not Collective 4933 4934 Input Parameters: 4935 + A - the matrix 4936 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4937 4938 Output Parameter: 4939 . A_loc - the local sequential matrix generated 4940 4941 Level: developer 4942 4943 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4944 4945 @*/ 4946 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4947 { 4948 PetscErrorCode ierr; 4949 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4950 Mat_SeqAIJ *mat,*a,*b; 4951 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4952 MatScalar *aa,*ba,*cam; 4953 PetscScalar *ca; 4954 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4955 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4956 PetscBool match; 4957 MPI_Comm comm; 4958 PetscMPIInt size; 4959 4960 PetscFunctionBegin; 4961 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4962 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4963 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4964 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4965 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4966 4967 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4968 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4969 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4970 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4971 aa = a->a; ba = b->a; 4972 if (scall == MAT_INITIAL_MATRIX) { 4973 if (size == 1) { 4974 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4975 PetscFunctionReturn(0); 4976 } 4977 4978 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4979 ci[0] = 0; 4980 for (i=0; i<am; i++) { 4981 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4982 } 4983 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4984 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4985 k = 0; 4986 for (i=0; i<am; i++) { 4987 ncols_o = bi[i+1] - bi[i]; 4988 ncols_d = ai[i+1] - ai[i]; 4989 /* off-diagonal portion of A */ 4990 for (jo=0; jo<ncols_o; jo++) { 4991 col = cmap[*bj]; 4992 if (col >= cstart) break; 4993 cj[k] = col; bj++; 4994 ca[k++] = *ba++; 4995 } 4996 /* diagonal portion of A */ 4997 for (j=0; j<ncols_d; j++) { 4998 cj[k] = cstart + *aj++; 4999 ca[k++] = *aa++; 5000 } 5001 /* off-diagonal portion of A */ 5002 for (j=jo; j<ncols_o; j++) { 5003 cj[k] = cmap[*bj++]; 5004 ca[k++] = *ba++; 5005 } 5006 } 5007 /* put together the new matrix */ 5008 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5009 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5010 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5011 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5012 mat->free_a = PETSC_TRUE; 5013 mat->free_ij = PETSC_TRUE; 5014 mat->nonew = 0; 5015 } else if (scall == MAT_REUSE_MATRIX) { 5016 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5017 ci = mat->i; cj = mat->j; cam = mat->a; 5018 for (i=0; i<am; i++) { 5019 /* off-diagonal portion of A */ 5020 ncols_o = bi[i+1] - bi[i]; 5021 for (jo=0; jo<ncols_o; jo++) { 5022 col = cmap[*bj]; 5023 if (col >= cstart) break; 5024 *cam++ = *ba++; bj++; 5025 } 5026 /* diagonal portion of A */ 5027 ncols_d = ai[i+1] - ai[i]; 5028 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5029 /* off-diagonal portion of A */ 5030 for (j=jo; j<ncols_o; j++) { 5031 *cam++ = *ba++; bj++; 5032 } 5033 } 5034 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5035 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5036 PetscFunctionReturn(0); 5037 } 5038 5039 #undef __FUNCT__ 5040 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5041 /*@C 5042 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5043 5044 Not Collective 5045 5046 Input Parameters: 5047 + A - the matrix 5048 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5049 - row, col - index sets of rows and columns to extract (or NULL) 5050 5051 Output Parameter: 5052 . A_loc - the local sequential matrix generated 5053 5054 Level: developer 5055 5056 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5057 5058 @*/ 5059 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5060 { 5061 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5062 PetscErrorCode ierr; 5063 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5064 IS isrowa,iscola; 5065 Mat *aloc; 5066 PetscBool match; 5067 5068 PetscFunctionBegin; 5069 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5070 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5071 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5072 if (!row) { 5073 start = A->rmap->rstart; end = A->rmap->rend; 5074 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5075 } else { 5076 isrowa = *row; 5077 } 5078 if (!col) { 5079 start = A->cmap->rstart; 5080 cmap = a->garray; 5081 nzA = a->A->cmap->n; 5082 nzB = a->B->cmap->n; 5083 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5084 ncols = 0; 5085 for (i=0; i<nzB; i++) { 5086 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5087 else break; 5088 } 5089 imark = i; 5090 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5091 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5092 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5093 } else { 5094 iscola = *col; 5095 } 5096 if (scall != MAT_INITIAL_MATRIX) { 5097 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5098 aloc[0] = *A_loc; 5099 } 5100 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5101 *A_loc = aloc[0]; 5102 ierr = PetscFree(aloc);CHKERRQ(ierr); 5103 if (!row) { 5104 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5105 } 5106 if (!col) { 5107 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5108 } 5109 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5110 PetscFunctionReturn(0); 5111 } 5112 5113 #undef __FUNCT__ 5114 #define __FUNCT__ "MatGetBrowsOfAcols" 5115 /*@C 5116 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5117 5118 Collective on Mat 5119 5120 Input Parameters: 5121 + A,B - the matrices in mpiaij format 5122 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5123 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5124 5125 Output Parameter: 5126 + rowb, colb - index sets of rows and columns of B to extract 5127 - B_seq - the sequential matrix generated 5128 5129 Level: developer 5130 5131 @*/ 5132 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5133 { 5134 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5135 PetscErrorCode ierr; 5136 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5137 IS isrowb,iscolb; 5138 Mat *bseq=NULL; 5139 5140 PetscFunctionBegin; 5141 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5142 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5143 } 5144 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5145 5146 if (scall == MAT_INITIAL_MATRIX) { 5147 start = A->cmap->rstart; 5148 cmap = a->garray; 5149 nzA = a->A->cmap->n; 5150 nzB = a->B->cmap->n; 5151 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5152 ncols = 0; 5153 for (i=0; i<nzB; i++) { /* row < local row index */ 5154 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5155 else break; 5156 } 5157 imark = i; 5158 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5159 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5160 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5161 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5162 } else { 5163 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5164 isrowb = *rowb; iscolb = *colb; 5165 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5166 bseq[0] = *B_seq; 5167 } 5168 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5169 *B_seq = bseq[0]; 5170 ierr = PetscFree(bseq);CHKERRQ(ierr); 5171 if (!rowb) { 5172 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5173 } else { 5174 *rowb = isrowb; 5175 } 5176 if (!colb) { 5177 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5178 } else { 5179 *colb = iscolb; 5180 } 5181 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5182 PetscFunctionReturn(0); 5183 } 5184 5185 #undef __FUNCT__ 5186 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5187 /* 5188 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5189 of the OFF-DIAGONAL portion of local A 5190 5191 Collective on Mat 5192 5193 Input Parameters: 5194 + A,B - the matrices in mpiaij format 5195 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5196 5197 Output Parameter: 5198 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5199 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5200 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5201 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5202 5203 Level: developer 5204 5205 */ 5206 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5207 { 5208 VecScatter_MPI_General *gen_to,*gen_from; 5209 PetscErrorCode ierr; 5210 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5211 Mat_SeqAIJ *b_oth; 5212 VecScatter ctx =a->Mvctx; 5213 MPI_Comm comm; 5214 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5215 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5216 PetscScalar *rvalues,*svalues; 5217 MatScalar *b_otha,*bufa,*bufA; 5218 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5219 MPI_Request *rwaits = NULL,*swaits = NULL; 5220 MPI_Status *sstatus,rstatus; 5221 PetscMPIInt jj,size; 5222 PetscInt *cols,sbs,rbs; 5223 PetscScalar *vals; 5224 5225 PetscFunctionBegin; 5226 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5227 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5228 if (size == 1) PetscFunctionReturn(0); 5229 5230 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5231 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5232 } 5233 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5234 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5235 5236 gen_to = (VecScatter_MPI_General*)ctx->todata; 5237 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5238 rvalues = gen_from->values; /* holds the length of receiving row */ 5239 svalues = gen_to->values; /* holds the length of sending row */ 5240 nrecvs = gen_from->n; 5241 nsends = gen_to->n; 5242 5243 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5244 srow = gen_to->indices; /* local row index to be sent */ 5245 sstarts = gen_to->starts; 5246 sprocs = gen_to->procs; 5247 sstatus = gen_to->sstatus; 5248 sbs = gen_to->bs; 5249 rstarts = gen_from->starts; 5250 rprocs = gen_from->procs; 5251 rbs = gen_from->bs; 5252 5253 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5254 if (scall == MAT_INITIAL_MATRIX) { 5255 /* i-array */ 5256 /*---------*/ 5257 /* post receives */ 5258 for (i=0; i<nrecvs; i++) { 5259 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5260 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5261 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5262 } 5263 5264 /* pack the outgoing message */ 5265 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5266 5267 sstartsj[0] = 0; 5268 rstartsj[0] = 0; 5269 len = 0; /* total length of j or a array to be sent */ 5270 k = 0; 5271 for (i=0; i<nsends; i++) { 5272 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5273 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5274 for (j=0; j<nrows; j++) { 5275 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5276 for (l=0; l<sbs; l++) { 5277 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5278 5279 rowlen[j*sbs+l] = ncols; 5280 5281 len += ncols; 5282 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5283 } 5284 k++; 5285 } 5286 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5287 5288 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5289 } 5290 /* recvs and sends of i-array are completed */ 5291 i = nrecvs; 5292 while (i--) { 5293 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5294 } 5295 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5296 5297 /* allocate buffers for sending j and a arrays */ 5298 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5299 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5300 5301 /* create i-array of B_oth */ 5302 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5303 5304 b_othi[0] = 0; 5305 len = 0; /* total length of j or a array to be received */ 5306 k = 0; 5307 for (i=0; i<nrecvs; i++) { 5308 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5309 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5310 for (j=0; j<nrows; j++) { 5311 b_othi[k+1] = b_othi[k] + rowlen[j]; 5312 len += rowlen[j]; k++; 5313 } 5314 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5315 } 5316 5317 /* allocate space for j and a arrrays of B_oth */ 5318 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5319 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5320 5321 /* j-array */ 5322 /*---------*/ 5323 /* post receives of j-array */ 5324 for (i=0; i<nrecvs; i++) { 5325 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5326 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5327 } 5328 5329 /* pack the outgoing message j-array */ 5330 k = 0; 5331 for (i=0; i<nsends; i++) { 5332 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5333 bufJ = bufj+sstartsj[i]; 5334 for (j=0; j<nrows; j++) { 5335 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5336 for (ll=0; ll<sbs; ll++) { 5337 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5338 for (l=0; l<ncols; l++) { 5339 *bufJ++ = cols[l]; 5340 } 5341 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5342 } 5343 } 5344 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5345 } 5346 5347 /* recvs and sends of j-array are completed */ 5348 i = nrecvs; 5349 while (i--) { 5350 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5351 } 5352 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5353 } else if (scall == MAT_REUSE_MATRIX) { 5354 sstartsj = *startsj_s; 5355 rstartsj = *startsj_r; 5356 bufa = *bufa_ptr; 5357 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5358 b_otha = b_oth->a; 5359 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5360 5361 /* a-array */ 5362 /*---------*/ 5363 /* post receives of a-array */ 5364 for (i=0; i<nrecvs; i++) { 5365 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5366 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5367 } 5368 5369 /* pack the outgoing message a-array */ 5370 k = 0; 5371 for (i=0; i<nsends; i++) { 5372 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5373 bufA = bufa+sstartsj[i]; 5374 for (j=0; j<nrows; j++) { 5375 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5376 for (ll=0; ll<sbs; ll++) { 5377 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5378 for (l=0; l<ncols; l++) { 5379 *bufA++ = vals[l]; 5380 } 5381 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5382 } 5383 } 5384 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5385 } 5386 /* recvs and sends of a-array are completed */ 5387 i = nrecvs; 5388 while (i--) { 5389 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5390 } 5391 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5392 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5393 5394 if (scall == MAT_INITIAL_MATRIX) { 5395 /* put together the new matrix */ 5396 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5397 5398 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5399 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5400 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5401 b_oth->free_a = PETSC_TRUE; 5402 b_oth->free_ij = PETSC_TRUE; 5403 b_oth->nonew = 0; 5404 5405 ierr = PetscFree(bufj);CHKERRQ(ierr); 5406 if (!startsj_s || !bufa_ptr) { 5407 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5408 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5409 } else { 5410 *startsj_s = sstartsj; 5411 *startsj_r = rstartsj; 5412 *bufa_ptr = bufa; 5413 } 5414 } 5415 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5416 PetscFunctionReturn(0); 5417 } 5418 5419 #undef __FUNCT__ 5420 #define __FUNCT__ "MatGetCommunicationStructs" 5421 /*@C 5422 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5423 5424 Not Collective 5425 5426 Input Parameters: 5427 . A - The matrix in mpiaij format 5428 5429 Output Parameter: 5430 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5431 . colmap - A map from global column index to local index into lvec 5432 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5433 5434 Level: developer 5435 5436 @*/ 5437 #if defined(PETSC_USE_CTABLE) 5438 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5439 #else 5440 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5441 #endif 5442 { 5443 Mat_MPIAIJ *a; 5444 5445 PetscFunctionBegin; 5446 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5447 PetscValidPointer(lvec, 2); 5448 PetscValidPointer(colmap, 3); 5449 PetscValidPointer(multScatter, 4); 5450 a = (Mat_MPIAIJ*) A->data; 5451 if (lvec) *lvec = a->lvec; 5452 if (colmap) *colmap = a->colmap; 5453 if (multScatter) *multScatter = a->Mvctx; 5454 PetscFunctionReturn(0); 5455 } 5456 5457 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5458 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5459 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5460 #if defined(PETSC_HAVE_ELEMENTAL) 5461 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5462 #endif 5463 5464 #undef __FUNCT__ 5465 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5466 /* 5467 Computes (B'*A')' since computing B*A directly is untenable 5468 5469 n p p 5470 ( ) ( ) ( ) 5471 m ( A ) * n ( B ) = m ( C ) 5472 ( ) ( ) ( ) 5473 5474 */ 5475 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5476 { 5477 PetscErrorCode ierr; 5478 Mat At,Bt,Ct; 5479 5480 PetscFunctionBegin; 5481 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5482 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5483 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5484 ierr = MatDestroy(&At);CHKERRQ(ierr); 5485 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5486 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5487 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5488 PetscFunctionReturn(0); 5489 } 5490 5491 #undef __FUNCT__ 5492 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5493 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5494 { 5495 PetscErrorCode ierr; 5496 PetscInt m=A->rmap->n,n=B->cmap->n; 5497 Mat Cmat; 5498 5499 PetscFunctionBegin; 5500 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5501 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5502 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5503 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5504 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5505 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5506 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5507 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5508 5509 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5510 5511 *C = Cmat; 5512 PetscFunctionReturn(0); 5513 } 5514 5515 /* ----------------------------------------------------------------*/ 5516 #undef __FUNCT__ 5517 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5518 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5519 { 5520 PetscErrorCode ierr; 5521 5522 PetscFunctionBegin; 5523 if (scall == MAT_INITIAL_MATRIX) { 5524 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5525 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5526 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5527 } 5528 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5529 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5530 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5531 PetscFunctionReturn(0); 5532 } 5533 5534 #if defined(PETSC_HAVE_MUMPS) 5535 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5536 #endif 5537 #if defined(PETSC_HAVE_PASTIX) 5538 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5539 #endif 5540 #if defined(PETSC_HAVE_SUPERLU_DIST) 5541 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5542 #endif 5543 #if defined(PETSC_HAVE_CLIQUE) 5544 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5545 #endif 5546 5547 /*MC 5548 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5549 5550 Options Database Keys: 5551 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5552 5553 Level: beginner 5554 5555 .seealso: MatCreateAIJ() 5556 M*/ 5557 5558 #undef __FUNCT__ 5559 #define __FUNCT__ "MatCreate_MPIAIJ" 5560 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5561 { 5562 Mat_MPIAIJ *b; 5563 PetscErrorCode ierr; 5564 PetscMPIInt size; 5565 5566 PetscFunctionBegin; 5567 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5568 5569 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5570 B->data = (void*)b; 5571 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5572 B->assembled = PETSC_FALSE; 5573 B->insertmode = NOT_SET_VALUES; 5574 b->size = size; 5575 5576 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5577 5578 /* build cache for off array entries formed */ 5579 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5580 5581 b->donotstash = PETSC_FALSE; 5582 b->colmap = 0; 5583 b->garray = 0; 5584 b->roworiented = PETSC_TRUE; 5585 5586 /* stuff used for matrix vector multiply */ 5587 b->lvec = NULL; 5588 b->Mvctx = NULL; 5589 5590 /* stuff for MatGetRow() */ 5591 b->rowindices = 0; 5592 b->rowvalues = 0; 5593 b->getrowactive = PETSC_FALSE; 5594 5595 /* flexible pointer used in CUSP/CUSPARSE classes */ 5596 b->spptr = NULL; 5597 5598 #if defined(PETSC_HAVE_MUMPS) 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5600 #endif 5601 #if defined(PETSC_HAVE_PASTIX) 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5603 #endif 5604 #if defined(PETSC_HAVE_SUPERLU_DIST) 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5606 #endif 5607 #if defined(PETSC_HAVE_CLIQUE) 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5609 #endif 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5620 #if defined(PETSC_HAVE_ELEMENTAL) 5621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5622 #endif 5623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5624 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5626 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5627 PetscFunctionReturn(0); 5628 } 5629 5630 #undef __FUNCT__ 5631 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5632 /*@C 5633 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5634 and "off-diagonal" part of the matrix in CSR format. 5635 5636 Collective on MPI_Comm 5637 5638 Input Parameters: 5639 + comm - MPI communicator 5640 . m - number of local rows (Cannot be PETSC_DECIDE) 5641 . n - This value should be the same as the local size used in creating the 5642 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5643 calculated if N is given) For square matrices n is almost always m. 5644 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5645 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5646 . i - row indices for "diagonal" portion of matrix 5647 . j - column indices 5648 . a - matrix values 5649 . oi - row indices for "off-diagonal" portion of matrix 5650 . oj - column indices 5651 - oa - matrix values 5652 5653 Output Parameter: 5654 . mat - the matrix 5655 5656 Level: advanced 5657 5658 Notes: 5659 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5660 must free the arrays once the matrix has been destroyed and not before. 5661 5662 The i and j indices are 0 based 5663 5664 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5665 5666 This sets local rows and cannot be used to set off-processor values. 5667 5668 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5669 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5670 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5671 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5672 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5673 communication if it is known that only local entries will be set. 5674 5675 .keywords: matrix, aij, compressed row, sparse, parallel 5676 5677 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5678 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5679 C@*/ 5680 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5681 { 5682 PetscErrorCode ierr; 5683 Mat_MPIAIJ *maij; 5684 5685 PetscFunctionBegin; 5686 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5687 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5688 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5689 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5690 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5691 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5692 maij = (Mat_MPIAIJ*) (*mat)->data; 5693 5694 (*mat)->preallocated = PETSC_TRUE; 5695 5696 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5697 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5698 5699 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5700 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5701 5702 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5703 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5704 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5705 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5706 5707 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5708 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5709 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5710 PetscFunctionReturn(0); 5711 } 5712 5713 /* 5714 Special version for direct calls from Fortran 5715 */ 5716 #include <petsc-private/fortranimpl.h> 5717 5718 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5719 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5720 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5721 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5722 #endif 5723 5724 /* Change these macros so can be used in void function */ 5725 #undef CHKERRQ 5726 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5727 #undef SETERRQ2 5728 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5729 #undef SETERRQ3 5730 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5731 #undef SETERRQ 5732 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5733 5734 #undef __FUNCT__ 5735 #define __FUNCT__ "matsetvaluesmpiaij_" 5736 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5737 { 5738 Mat mat = *mmat; 5739 PetscInt m = *mm, n = *mn; 5740 InsertMode addv = *maddv; 5741 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5742 PetscScalar value; 5743 PetscErrorCode ierr; 5744 5745 MatCheckPreallocated(mat,1); 5746 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5747 5748 #if defined(PETSC_USE_DEBUG) 5749 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5750 #endif 5751 { 5752 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5753 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5754 PetscBool roworiented = aij->roworiented; 5755 5756 /* Some Variables required in the macro */ 5757 Mat A = aij->A; 5758 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5759 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5760 MatScalar *aa = a->a; 5761 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5762 Mat B = aij->B; 5763 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5764 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5765 MatScalar *ba = b->a; 5766 5767 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5768 PetscInt nonew = a->nonew; 5769 MatScalar *ap1,*ap2; 5770 5771 PetscFunctionBegin; 5772 for (i=0; i<m; i++) { 5773 if (im[i] < 0) continue; 5774 #if defined(PETSC_USE_DEBUG) 5775 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5776 #endif 5777 if (im[i] >= rstart && im[i] < rend) { 5778 row = im[i] - rstart; 5779 lastcol1 = -1; 5780 rp1 = aj + ai[row]; 5781 ap1 = aa + ai[row]; 5782 rmax1 = aimax[row]; 5783 nrow1 = ailen[row]; 5784 low1 = 0; 5785 high1 = nrow1; 5786 lastcol2 = -1; 5787 rp2 = bj + bi[row]; 5788 ap2 = ba + bi[row]; 5789 rmax2 = bimax[row]; 5790 nrow2 = bilen[row]; 5791 low2 = 0; 5792 high2 = nrow2; 5793 5794 for (j=0; j<n; j++) { 5795 if (roworiented) value = v[i*n+j]; 5796 else value = v[i+j*m]; 5797 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5798 if (in[j] >= cstart && in[j] < cend) { 5799 col = in[j] - cstart; 5800 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5801 } else if (in[j] < 0) continue; 5802 #if defined(PETSC_USE_DEBUG) 5803 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5804 #endif 5805 else { 5806 if (mat->was_assembled) { 5807 if (!aij->colmap) { 5808 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5809 } 5810 #if defined(PETSC_USE_CTABLE) 5811 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5812 col--; 5813 #else 5814 col = aij->colmap[in[j]] - 1; 5815 #endif 5816 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5817 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5818 col = in[j]; 5819 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5820 B = aij->B; 5821 b = (Mat_SeqAIJ*)B->data; 5822 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5823 rp2 = bj + bi[row]; 5824 ap2 = ba + bi[row]; 5825 rmax2 = bimax[row]; 5826 nrow2 = bilen[row]; 5827 low2 = 0; 5828 high2 = nrow2; 5829 bm = aij->B->rmap->n; 5830 ba = b->a; 5831 } 5832 } else col = in[j]; 5833 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5834 } 5835 } 5836 } else if (!aij->donotstash) { 5837 if (roworiented) { 5838 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5839 } else { 5840 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5841 } 5842 } 5843 } 5844 } 5845 PetscFunctionReturnVoid(); 5846 } 5847 5848