1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) { 526 if (roworiented) value = v[i*n+j]; 527 else value = v[i+j*m]; 528 } else value = 0.0; 529 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 530 if (in[j] >= cstart && in[j] < cend) { 531 col = in[j] - cstart; 532 nonew = a->nonew; 533 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 534 } else if (in[j] < 0) continue; 535 #if defined(PETSC_USE_DEBUG) 536 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 537 #endif 538 else { 539 if (mat->was_assembled) { 540 if (!aij->colmap) { 541 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 542 } 543 #if defined(PETSC_USE_CTABLE) 544 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 545 col--; 546 #else 547 col = aij->colmap[in[j]] - 1; 548 #endif 549 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 550 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 551 col = in[j]; 552 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 553 B = aij->B; 554 b = (Mat_SeqAIJ*)B->data; 555 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 556 rp2 = bj + bi[row]; 557 ap2 = ba + bi[row]; 558 rmax2 = bimax[row]; 559 nrow2 = bilen[row]; 560 low2 = 0; 561 high2 = nrow2; 562 bm = aij->B->rmap->n; 563 ba = b->a; 564 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 565 } else col = in[j]; 566 nonew = b->nonew; 567 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 568 } 569 } 570 } else { 571 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 572 if (!aij->donotstash) { 573 mat->assembled = PETSC_FALSE; 574 if (roworiented) { 575 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 576 } else { 577 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 578 } 579 } 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatGetValues_MPIAIJ" 587 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 588 { 589 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 590 PetscErrorCode ierr; 591 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 592 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 593 594 PetscFunctionBegin; 595 for (i=0; i<m; i++) { 596 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 597 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 598 if (idxm[i] >= rstart && idxm[i] < rend) { 599 row = idxm[i] - rstart; 600 for (j=0; j<n; j++) { 601 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 602 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 603 if (idxn[j] >= cstart && idxn[j] < cend) { 604 col = idxn[j] - cstart; 605 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 606 } else { 607 if (!aij->colmap) { 608 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 609 } 610 #if defined(PETSC_USE_CTABLE) 611 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 612 col--; 613 #else 614 col = aij->colmap[idxn[j]] - 1; 615 #endif 616 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 617 else { 618 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 619 } 620 } 621 } 622 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 623 } 624 PetscFunctionReturn(0); 625 } 626 627 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 628 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 631 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 632 { 633 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 634 PetscErrorCode ierr; 635 PetscInt nstash,reallocs; 636 InsertMode addv; 637 638 PetscFunctionBegin; 639 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 640 641 /* make sure all processors are either in INSERTMODE or ADDMODE */ 642 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 643 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 644 mat->insertmode = addv; /* in case this processor had no cache */ 645 646 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 647 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 648 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 654 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 655 { 656 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 658 PetscErrorCode ierr; 659 PetscMPIInt n; 660 PetscInt i,j,rstart,ncols,flg; 661 PetscInt *row,*col; 662 PetscBool other_disassembled; 663 PetscScalar *val; 664 InsertMode addv = mat->insertmode; 665 666 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 667 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n; ) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { 677 if (row[j] != rstart) break; 678 } 679 if (j < n) ncols = j-i; 680 else ncols = n-i; 681 /* Now assemble all these values with a single function call */ 682 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 683 684 i = j; 685 } 686 } 687 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 691 692 /* determine if any processor has disassembled, if so we must 693 also disassemble ourselfs, in order that we may reassemble. */ 694 /* 695 if nonzero structure of submatrix B cannot change then we know that 696 no processor disassembled thus we can skip this stuff 697 */ 698 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 699 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 700 if (mat->was_assembled && !other_disassembled) { 701 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 702 } 703 } 704 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 705 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 706 } 707 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 708 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 709 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 710 711 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 712 713 aij->rowvalues = 0; 714 715 /* used by MatAXPY() */ 716 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 717 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 718 719 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 720 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 721 722 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 723 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 724 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 725 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 748 PetscInt *owners = A->rmap->range; 749 PetscInt n = A->rmap->n; 750 PetscMPIInt size = mat->size; 751 PetscSF sf; 752 PetscInt *lrows; 753 PetscSFNode *rrows; 754 PetscInt lastidx = -1, r, p = 0, len = 0; 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 /* Create SF where leaves are input rows and roots are owned rows */ 759 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 760 for (r = 0; r < n; ++r) lrows[r] = -1; 761 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 762 for (r = 0; r < N; ++r) { 763 const PetscInt idx = rows[r]; 764 PetscBool found = PETSC_FALSE; 765 /* Trick for efficient searching for sorted rows */ 766 if (lastidx > idx) p = 0; 767 lastidx = idx; 768 for (; p < size; ++p) { 769 if (idx >= owners[p] && idx < owners[p+1]) { 770 rrows[r].rank = p; 771 rrows[r].index = rows[r] - owners[p]; 772 found = PETSC_TRUE; 773 break; 774 } 775 } 776 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 777 } 778 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 779 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 780 /* Collect flags for rows to be zeroed */ 781 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 782 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 783 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 784 /* Compress and put in row numbers */ 785 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 786 /* fix right hand side if needed */ 787 if (x && b) { 788 const PetscScalar *xx; 789 PetscScalar *bb; 790 791 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 792 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 795 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 796 } 797 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 798 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 799 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 800 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 801 } else if (diag != 0.0) { 802 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 803 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 804 for (r = 0; r < len; ++r) { 805 const PetscInt row = lrows[r] + A->rmap->rstart; 806 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 807 } 808 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 809 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 810 } else { 811 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 812 } 813 ierr = PetscFree(lrows);CHKERRQ(ierr); 814 815 /* only change matrix nonzero state if pattern was allowed to be changed */ 816 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 817 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 818 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 819 } 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 825 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 826 { 827 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 828 PetscErrorCode ierr; 829 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 830 PetscInt i,j,r,m,p = 0,len = 0; 831 PetscInt *lrows,*owners = A->rmap->range; 832 PetscSFNode *rrows; 833 PetscSF sf; 834 const PetscScalar *xx; 835 PetscScalar *bb,*mask; 836 Vec xmask,lmask; 837 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 838 const PetscInt *aj, *ii,*ridx; 839 PetscScalar *aa; 840 #if defined(PETSC_DEBUG) 841 PetscBool found = PETSC_FALSE; 842 #endif 843 844 PetscFunctionBegin; 845 /* Create SF where leaves are input rows and roots are owned rows */ 846 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 847 for (r = 0; r < n; ++r) lrows[r] = -1; 848 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 849 for (r = 0; r < N; ++r) { 850 const PetscInt idx = rows[r]; 851 PetscBool found = PETSC_FALSE; 852 /* Trick for efficient searching for sorted rows */ 853 if (lastidx > idx) p = 0; 854 lastidx = idx; 855 for (; p < size; ++p) { 856 if (idx >= owners[p] && idx < owners[p+1]) { 857 rrows[r].rank = p; 858 rrows[r].index = rows[r] - owners[p]; 859 found = PETSC_TRUE; 860 break; 861 } 862 } 863 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 864 } 865 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 866 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 867 /* Collect flags for rows to be zeroed */ 868 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 869 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 870 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 871 /* Compress and put in row numbers */ 872 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 873 /* zero diagonal part of matrix */ 874 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 875 /* handle off diagonal part of matrix */ 876 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 877 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 878 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 879 for (i=0; i<len; i++) bb[lrows[i]] = 1; 880 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 881 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 884 if (x) { 885 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 888 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 889 } 890 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 891 /* remove zeroed rows of off diagonal matrix */ 892 ii = aij->i; 893 for (i=0; i<len; i++) { 894 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 895 } 896 /* loop over all elements of off process part of matrix zeroing removed columns*/ 897 if (aij->compressedrow.use) { 898 m = aij->compressedrow.nrows; 899 ii = aij->compressedrow.i; 900 ridx = aij->compressedrow.rindex; 901 for (i=0; i<m; i++) { 902 n = ii[i+1] - ii[i]; 903 aj = aij->j + ii[i]; 904 aa = aij->a + ii[i]; 905 906 for (j=0; j<n; j++) { 907 if (PetscAbsScalar(mask[*aj])) { 908 if (b) bb[*ridx] -= *aa*xx[*aj]; 909 *aa = 0.0; 910 } 911 aa++; 912 aj++; 913 } 914 ridx++; 915 } 916 } else { /* do not use compressed row format */ 917 m = l->B->rmap->n; 918 for (i=0; i<m; i++) { 919 n = ii[i+1] - ii[i]; 920 aj = aij->j + ii[i]; 921 aa = aij->a + ii[i]; 922 for (j=0; j<n; j++) { 923 if (PetscAbsScalar(mask[*aj])) { 924 if (b) bb[i] -= *aa*xx[*aj]; 925 *aa = 0.0; 926 } 927 aa++; 928 aj++; 929 } 930 } 931 } 932 if (x) { 933 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 934 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 935 } 936 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 937 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 938 ierr = PetscFree(lrows);CHKERRQ(ierr); 939 940 /* only change matrix nonzero state if pattern was allowed to be changed */ 941 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 942 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 943 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 944 } 945 PetscFunctionReturn(0); 946 } 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "MatMult_MPIAIJ" 950 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 951 { 952 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 953 PetscErrorCode ierr; 954 PetscInt nt; 955 956 PetscFunctionBegin; 957 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 958 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 959 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 960 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 961 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 968 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 969 { 970 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatMultAdd_MPIAIJ" 980 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 981 { 982 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 988 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 998 PetscErrorCode ierr; 999 PetscBool merged; 1000 1001 PetscFunctionBegin; 1002 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1003 /* do nondiagonal part */ 1004 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1005 if (!merged) { 1006 /* send it on its way */ 1007 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1008 /* do local part */ 1009 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1010 /* receive remote parts: note this assumes the values are not actually */ 1011 /* added in yy until the next line, */ 1012 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1013 } else { 1014 /* do local part */ 1015 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1016 /* send it on its way */ 1017 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1018 /* values actually were received in the Begin() but we need to call this nop */ 1019 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1026 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1027 { 1028 MPI_Comm comm; 1029 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1030 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1031 IS Me,Notme; 1032 PetscErrorCode ierr; 1033 PetscInt M,N,first,last,*notme,i; 1034 PetscMPIInt size; 1035 1036 PetscFunctionBegin; 1037 /* Easy test: symmetric diagonal block */ 1038 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1039 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1040 if (!*f) PetscFunctionReturn(0); 1041 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1042 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1043 if (size == 1) PetscFunctionReturn(0); 1044 1045 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1046 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1047 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1048 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1049 for (i=0; i<first; i++) notme[i] = i; 1050 for (i=last; i<M; i++) notme[i-last+first] = i; 1051 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1052 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1053 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1054 Aoff = Aoffs[0]; 1055 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1056 Boff = Boffs[0]; 1057 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1058 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1059 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1060 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1061 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1062 ierr = PetscFree(notme);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 /* do nondiagonal part */ 1075 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1076 /* send it on its way */ 1077 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1078 /* do local part */ 1079 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1080 /* receive remote parts */ 1081 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 /* 1086 This only works correctly for square matrices where the subblock A->A is the 1087 diagonal block 1088 */ 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1091 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1092 { 1093 PetscErrorCode ierr; 1094 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1095 1096 PetscFunctionBegin; 1097 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1098 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1099 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatScale_MPIAIJ" 1105 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1112 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatDestroy_MPIAIJ" 1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1119 { 1120 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1121 PetscErrorCode ierr; 1122 1123 PetscFunctionBegin; 1124 #if defined(PETSC_USE_LOG) 1125 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1126 #endif 1127 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1128 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1129 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1130 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1131 #if defined(PETSC_USE_CTABLE) 1132 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1133 #else 1134 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1135 #endif 1136 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1137 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1138 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1139 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1140 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1141 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1142 1143 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1157 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1158 { 1159 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1160 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1161 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1162 PetscErrorCode ierr; 1163 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1164 int fd; 1165 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1166 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1167 PetscScalar *column_values; 1168 PetscInt message_count,flowcontrolcount; 1169 FILE *file; 1170 1171 PetscFunctionBegin; 1172 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1173 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1174 nz = A->nz + B->nz; 1175 if (!rank) { 1176 header[0] = MAT_FILE_CLASSID; 1177 header[1] = mat->rmap->N; 1178 header[2] = mat->cmap->N; 1179 1180 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1181 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1182 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1183 /* get largest number of rows any processor has */ 1184 rlen = mat->rmap->n; 1185 range = mat->rmap->range; 1186 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1187 } else { 1188 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1189 rlen = mat->rmap->n; 1190 } 1191 1192 /* load up the local row counts */ 1193 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1194 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1195 1196 /* store the row lengths to the file */ 1197 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1198 if (!rank) { 1199 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1200 for (i=1; i<size; i++) { 1201 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1202 rlen = range[i+1] - range[i]; 1203 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1204 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 } 1206 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1207 } else { 1208 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1209 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1210 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1211 } 1212 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1213 1214 /* load up the local column indices */ 1215 nzmax = nz; /* th processor needs space a largest processor needs */ 1216 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1217 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1218 cnt = 0; 1219 for (i=0; i<mat->rmap->n; i++) { 1220 for (j=B->i[i]; j<B->i[i+1]; j++) { 1221 if ((col = garray[B->j[j]]) > cstart) break; 1222 column_indices[cnt++] = col; 1223 } 1224 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1225 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1226 } 1227 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1228 1229 /* store the column indices to the file */ 1230 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1231 if (!rank) { 1232 MPI_Status status; 1233 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1234 for (i=1; i<size; i++) { 1235 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1236 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1237 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1238 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1240 } 1241 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1244 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1245 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1246 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1247 } 1248 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1249 1250 /* load up the local column values */ 1251 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1252 cnt = 0; 1253 for (i=0; i<mat->rmap->n; i++) { 1254 for (j=B->i[i]; j<B->i[i+1]; j++) { 1255 if (garray[B->j[j]] > cstart) break; 1256 column_values[cnt++] = B->a[j]; 1257 } 1258 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1259 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1260 } 1261 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1262 1263 /* store the column values to the file */ 1264 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1265 if (!rank) { 1266 MPI_Status status; 1267 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1268 for (i=1; i<size; i++) { 1269 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1270 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1271 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1272 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1273 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1274 } 1275 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1276 } else { 1277 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1278 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1279 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1280 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1281 } 1282 ierr = PetscFree(column_values);CHKERRQ(ierr); 1283 1284 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1285 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #include <petscdraw.h> 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1292 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1293 { 1294 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank = aij->rank,size = aij->size; 1297 PetscBool isdraw,iascii,isbinary; 1298 PetscViewer sviewer; 1299 PetscViewerFormat format; 1300 1301 PetscFunctionBegin; 1302 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1303 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1304 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1305 if (iascii) { 1306 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1307 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1308 MatInfo info; 1309 PetscBool inodes; 1310 1311 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1312 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1313 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1315 if (!inodes) { 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1317 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1318 } else { 1319 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1320 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1321 } 1322 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1323 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1324 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1326 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1328 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1329 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1332 PetscInt inodecount,inodelimit,*inodes; 1333 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1334 if (inodes) { 1335 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1336 } else { 1337 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1338 } 1339 PetscFunctionReturn(0); 1340 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1341 PetscFunctionReturn(0); 1342 } 1343 } else if (isbinary) { 1344 if (size == 1) { 1345 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1346 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1347 } else { 1348 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1349 } 1350 PetscFunctionReturn(0); 1351 } else if (isdraw) { 1352 PetscDraw draw; 1353 PetscBool isnull; 1354 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1355 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1356 } 1357 1358 if (size == 1) { 1359 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1360 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1361 } else { 1362 /* assemble the entire matrix onto first processor. */ 1363 Mat A; 1364 Mat_SeqAIJ *Aloc; 1365 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1366 MatScalar *a; 1367 1368 if (mat->rmap->N > 1024) { 1369 PetscBool flg = PETSC_FALSE; 1370 1371 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1372 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1373 } 1374 1375 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1376 if (!rank) { 1377 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1378 } else { 1379 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1380 } 1381 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1382 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1383 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1384 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1385 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1386 1387 /* copy over the A part */ 1388 Aloc = (Mat_SeqAIJ*)aij->A->data; 1389 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1390 row = mat->rmap->rstart; 1391 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1392 for (i=0; i<m; i++) { 1393 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1394 row++; 1395 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1396 } 1397 aj = Aloc->j; 1398 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1399 1400 /* copy over the B part */ 1401 Aloc = (Mat_SeqAIJ*)aij->B->data; 1402 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1403 row = mat->rmap->rstart; 1404 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1405 ct = cols; 1406 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1407 for (i=0; i<m; i++) { 1408 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1409 row++; 1410 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1411 } 1412 ierr = PetscFree(ct);CHKERRQ(ierr); 1413 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1414 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1415 /* 1416 Everyone has to call to draw the matrix since the graphics waits are 1417 synchronized across all processors that share the PetscDraw object 1418 */ 1419 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1420 if (!rank) { 1421 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1422 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1423 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1424 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1425 } 1426 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1427 ierr = MatDestroy(&A);CHKERRQ(ierr); 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatView_MPIAIJ" 1434 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1435 { 1436 PetscErrorCode ierr; 1437 PetscBool iascii,isdraw,issocket,isbinary; 1438 1439 PetscFunctionBegin; 1440 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1443 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1444 if (iascii || isdraw || isbinary || issocket) { 1445 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1446 } 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatSOR_MPIAIJ" 1452 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1453 { 1454 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1455 PetscErrorCode ierr; 1456 Vec bb1 = 0; 1457 PetscBool hasop; 1458 1459 PetscFunctionBegin; 1460 if (flag == SOR_APPLY_UPPER) { 1461 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1462 PetscFunctionReturn(0); 1463 } 1464 1465 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1466 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1467 } 1468 1469 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1470 if (flag & SOR_ZERO_INITIAL_GUESS) { 1471 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1472 its--; 1473 } 1474 1475 while (its--) { 1476 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1477 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1478 1479 /* update rhs: bb1 = bb - B*x */ 1480 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1481 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1482 1483 /* local sweep */ 1484 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1485 } 1486 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1487 if (flag & SOR_ZERO_INITIAL_GUESS) { 1488 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1489 its--; 1490 } 1491 while (its--) { 1492 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1494 1495 /* update rhs: bb1 = bb - B*x */ 1496 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1497 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1498 1499 /* local sweep */ 1500 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1501 } 1502 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1503 if (flag & SOR_ZERO_INITIAL_GUESS) { 1504 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1505 its--; 1506 } 1507 while (its--) { 1508 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1510 1511 /* update rhs: bb1 = bb - B*x */ 1512 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1513 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1514 1515 /* local sweep */ 1516 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1517 } 1518 } else if (flag & SOR_EISENSTAT) { 1519 Vec xx1; 1520 1521 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1522 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1523 1524 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1525 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1526 if (!mat->diag) { 1527 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1528 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1529 } 1530 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1531 if (hasop) { 1532 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1533 } else { 1534 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1535 } 1536 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1537 1538 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1539 1540 /* local sweep */ 1541 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1542 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1543 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1544 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1545 1546 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatPermute_MPIAIJ" 1552 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1553 { 1554 Mat aA,aB,Aperm; 1555 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1556 PetscScalar *aa,*ba; 1557 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1558 PetscSF rowsf,sf; 1559 IS parcolp = NULL; 1560 PetscBool done; 1561 PetscErrorCode ierr; 1562 1563 PetscFunctionBegin; 1564 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1565 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1566 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1567 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1568 1569 /* Invert row permutation to find out where my rows should go */ 1570 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1571 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1572 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1573 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1574 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1575 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1576 1577 /* Invert column permutation to find out where my columns should go */ 1578 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1579 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1580 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1581 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1582 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1583 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1585 1586 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1587 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1588 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1589 1590 /* Find out where my gcols should go */ 1591 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1592 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1593 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1594 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1595 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1596 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1597 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1598 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1599 1600 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1601 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1602 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1603 for (i=0; i<m; i++) { 1604 PetscInt row = rdest[i],rowner; 1605 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1606 for (j=ai[i]; j<ai[i+1]; j++) { 1607 PetscInt cowner,col = cdest[aj[j]]; 1608 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1609 if (rowner == cowner) dnnz[i]++; 1610 else onnz[i]++; 1611 } 1612 for (j=bi[i]; j<bi[i+1]; j++) { 1613 PetscInt cowner,col = gcdest[bj[j]]; 1614 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1615 if (rowner == cowner) dnnz[i]++; 1616 else onnz[i]++; 1617 } 1618 } 1619 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1620 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1621 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1622 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1623 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1624 1625 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1626 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1627 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1628 for (i=0; i<m; i++) { 1629 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1630 PetscInt j0,rowlen; 1631 rowlen = ai[i+1] - ai[i]; 1632 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1633 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1634 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1635 } 1636 rowlen = bi[i+1] - bi[i]; 1637 for (j0=j=0; j<rowlen; j0=j) { 1638 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1639 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1640 } 1641 } 1642 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1644 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1645 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1646 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1647 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1648 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1649 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1650 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1651 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1652 *B = Aperm; 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1658 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1659 { 1660 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1661 Mat A = mat->A,B = mat->B; 1662 PetscErrorCode ierr; 1663 PetscReal isend[5],irecv[5]; 1664 1665 PetscFunctionBegin; 1666 info->block_size = 1.0; 1667 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1668 1669 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1670 isend[3] = info->memory; isend[4] = info->mallocs; 1671 1672 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1673 1674 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1675 isend[3] += info->memory; isend[4] += info->mallocs; 1676 if (flag == MAT_LOCAL) { 1677 info->nz_used = isend[0]; 1678 info->nz_allocated = isend[1]; 1679 info->nz_unneeded = isend[2]; 1680 info->memory = isend[3]; 1681 info->mallocs = isend[4]; 1682 } else if (flag == MAT_GLOBAL_MAX) { 1683 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1684 1685 info->nz_used = irecv[0]; 1686 info->nz_allocated = irecv[1]; 1687 info->nz_unneeded = irecv[2]; 1688 info->memory = irecv[3]; 1689 info->mallocs = irecv[4]; 1690 } else if (flag == MAT_GLOBAL_SUM) { 1691 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1692 1693 info->nz_used = irecv[0]; 1694 info->nz_allocated = irecv[1]; 1695 info->nz_unneeded = irecv[2]; 1696 info->memory = irecv[3]; 1697 info->mallocs = irecv[4]; 1698 } 1699 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1700 info->fill_ratio_needed = 0; 1701 info->factor_mallocs = 0; 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatSetOption_MPIAIJ" 1707 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1708 { 1709 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 switch (op) { 1714 case MAT_NEW_NONZERO_LOCATIONS: 1715 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1716 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1717 case MAT_KEEP_NONZERO_PATTERN: 1718 case MAT_NEW_NONZERO_LOCATION_ERR: 1719 case MAT_USE_INODES: 1720 case MAT_IGNORE_ZERO_ENTRIES: 1721 MatCheckPreallocated(A,1); 1722 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1723 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1724 break; 1725 case MAT_ROW_ORIENTED: 1726 a->roworiented = flg; 1727 1728 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1729 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1730 break; 1731 case MAT_NEW_DIAGONALS: 1732 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1733 break; 1734 case MAT_IGNORE_OFF_PROC_ENTRIES: 1735 a->donotstash = flg; 1736 break; 1737 case MAT_SPD: 1738 A->spd_set = PETSC_TRUE; 1739 A->spd = flg; 1740 if (flg) { 1741 A->symmetric = PETSC_TRUE; 1742 A->structurally_symmetric = PETSC_TRUE; 1743 A->symmetric_set = PETSC_TRUE; 1744 A->structurally_symmetric_set = PETSC_TRUE; 1745 } 1746 break; 1747 case MAT_SYMMETRIC: 1748 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1749 break; 1750 case MAT_STRUCTURALLY_SYMMETRIC: 1751 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1752 break; 1753 case MAT_HERMITIAN: 1754 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1755 break; 1756 case MAT_SYMMETRY_ETERNAL: 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 break; 1759 default: 1760 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1761 } 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatGetRow_MPIAIJ" 1767 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1768 { 1769 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1770 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1771 PetscErrorCode ierr; 1772 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1773 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1774 PetscInt *cmap,*idx_p; 1775 1776 PetscFunctionBegin; 1777 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1778 mat->getrowactive = PETSC_TRUE; 1779 1780 if (!mat->rowvalues && (idx || v)) { 1781 /* 1782 allocate enough space to hold information from the longest row. 1783 */ 1784 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1785 PetscInt max = 1,tmp; 1786 for (i=0; i<matin->rmap->n; i++) { 1787 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1788 if (max < tmp) max = tmp; 1789 } 1790 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1791 } 1792 1793 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1794 lrow = row - rstart; 1795 1796 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1797 if (!v) {pvA = 0; pvB = 0;} 1798 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1799 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1800 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1801 nztot = nzA + nzB; 1802 1803 cmap = mat->garray; 1804 if (v || idx) { 1805 if (nztot) { 1806 /* Sort by increasing column numbers, assuming A and B already sorted */ 1807 PetscInt imark = -1; 1808 if (v) { 1809 *v = v_p = mat->rowvalues; 1810 for (i=0; i<nzB; i++) { 1811 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1812 else break; 1813 } 1814 imark = i; 1815 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1816 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1817 } 1818 if (idx) { 1819 *idx = idx_p = mat->rowindices; 1820 if (imark > -1) { 1821 for (i=0; i<imark; i++) { 1822 idx_p[i] = cmap[cworkB[i]]; 1823 } 1824 } else { 1825 for (i=0; i<nzB; i++) { 1826 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1827 else break; 1828 } 1829 imark = i; 1830 } 1831 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1832 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1833 } 1834 } else { 1835 if (idx) *idx = 0; 1836 if (v) *v = 0; 1837 } 1838 } 1839 *nz = nztot; 1840 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1841 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1842 PetscFunctionReturn(0); 1843 } 1844 1845 #undef __FUNCT__ 1846 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1847 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1848 { 1849 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1850 1851 PetscFunctionBegin; 1852 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1853 aij->getrowactive = PETSC_FALSE; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatNorm_MPIAIJ" 1859 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1860 { 1861 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1862 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1863 PetscErrorCode ierr; 1864 PetscInt i,j,cstart = mat->cmap->rstart; 1865 PetscReal sum = 0.0; 1866 MatScalar *v; 1867 1868 PetscFunctionBegin; 1869 if (aij->size == 1) { 1870 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1871 } else { 1872 if (type == NORM_FROBENIUS) { 1873 v = amat->a; 1874 for (i=0; i<amat->nz; i++) { 1875 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1876 } 1877 v = bmat->a; 1878 for (i=0; i<bmat->nz; i++) { 1879 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1880 } 1881 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1882 *norm = PetscSqrtReal(*norm); 1883 } else if (type == NORM_1) { /* max column norm */ 1884 PetscReal *tmp,*tmp2; 1885 PetscInt *jj,*garray = aij->garray; 1886 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1887 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1888 *norm = 0.0; 1889 v = amat->a; jj = amat->j; 1890 for (j=0; j<amat->nz; j++) { 1891 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1892 } 1893 v = bmat->a; jj = bmat->j; 1894 for (j=0; j<bmat->nz; j++) { 1895 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1896 } 1897 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1898 for (j=0; j<mat->cmap->N; j++) { 1899 if (tmp2[j] > *norm) *norm = tmp2[j]; 1900 } 1901 ierr = PetscFree(tmp);CHKERRQ(ierr); 1902 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1903 } else if (type == NORM_INFINITY) { /* max row norm */ 1904 PetscReal ntemp = 0.0; 1905 for (j=0; j<aij->A->rmap->n; j++) { 1906 v = amat->a + amat->i[j]; 1907 sum = 0.0; 1908 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1909 sum += PetscAbsScalar(*v); v++; 1910 } 1911 v = bmat->a + bmat->i[j]; 1912 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1913 sum += PetscAbsScalar(*v); v++; 1914 } 1915 if (sum > ntemp) ntemp = sum; 1916 } 1917 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1918 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1919 } 1920 PetscFunctionReturn(0); 1921 } 1922 1923 #undef __FUNCT__ 1924 #define __FUNCT__ "MatTranspose_MPIAIJ" 1925 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1926 { 1927 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1928 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1929 PetscErrorCode ierr; 1930 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1931 PetscInt cstart = A->cmap->rstart,ncol; 1932 Mat B; 1933 MatScalar *array; 1934 1935 PetscFunctionBegin; 1936 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1937 1938 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1939 ai = Aloc->i; aj = Aloc->j; 1940 bi = Bloc->i; bj = Bloc->j; 1941 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1942 PetscInt *d_nnz,*g_nnz,*o_nnz; 1943 PetscSFNode *oloc; 1944 PETSC_UNUSED PetscSF sf; 1945 1946 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1947 /* compute d_nnz for preallocation */ 1948 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1949 for (i=0; i<ai[ma]; i++) { 1950 d_nnz[aj[i]]++; 1951 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1952 } 1953 /* compute local off-diagonal contributions */ 1954 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1955 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1956 /* map those to global */ 1957 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1958 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1959 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1960 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1961 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1962 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1963 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1964 1965 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1966 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1967 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1968 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1969 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1970 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1971 } else { 1972 B = *matout; 1973 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1974 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1975 } 1976 1977 /* copy over the A part */ 1978 array = Aloc->a; 1979 row = A->rmap->rstart; 1980 for (i=0; i<ma; i++) { 1981 ncol = ai[i+1]-ai[i]; 1982 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1983 row++; 1984 array += ncol; aj += ncol; 1985 } 1986 aj = Aloc->j; 1987 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1988 1989 /* copy over the B part */ 1990 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1991 array = Bloc->a; 1992 row = A->rmap->rstart; 1993 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1994 cols_tmp = cols; 1995 for (i=0; i<mb; i++) { 1996 ncol = bi[i+1]-bi[i]; 1997 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1998 row++; 1999 array += ncol; cols_tmp += ncol; 2000 } 2001 ierr = PetscFree(cols);CHKERRQ(ierr); 2002 2003 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2004 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2005 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2006 *matout = B; 2007 } else { 2008 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2009 } 2010 PetscFunctionReturn(0); 2011 } 2012 2013 #undef __FUNCT__ 2014 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2015 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2016 { 2017 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2018 Mat a = aij->A,b = aij->B; 2019 PetscErrorCode ierr; 2020 PetscInt s1,s2,s3; 2021 2022 PetscFunctionBegin; 2023 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2024 if (rr) { 2025 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2026 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2027 /* Overlap communication with computation. */ 2028 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2029 } 2030 if (ll) { 2031 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2032 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2033 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2034 } 2035 /* scale the diagonal block */ 2036 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2037 2038 if (rr) { 2039 /* Do a scatter end and then right scale the off-diagonal block */ 2040 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2041 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2042 } 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2048 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2049 { 2050 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2051 PetscErrorCode ierr; 2052 2053 PetscFunctionBegin; 2054 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatEqual_MPIAIJ" 2060 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2061 { 2062 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2063 Mat a,b,c,d; 2064 PetscBool flg; 2065 PetscErrorCode ierr; 2066 2067 PetscFunctionBegin; 2068 a = matA->A; b = matA->B; 2069 c = matB->A; d = matB->B; 2070 2071 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2072 if (flg) { 2073 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2074 } 2075 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatCopy_MPIAIJ" 2081 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2082 { 2083 PetscErrorCode ierr; 2084 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2085 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2086 2087 PetscFunctionBegin; 2088 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2089 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2090 /* because of the column compression in the off-processor part of the matrix a->B, 2091 the number of columns in a->B and b->B may be different, hence we cannot call 2092 the MatCopy() directly on the two parts. If need be, we can provide a more 2093 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2094 then copying the submatrices */ 2095 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2096 } else { 2097 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2098 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2099 } 2100 PetscFunctionReturn(0); 2101 } 2102 2103 #undef __FUNCT__ 2104 #define __FUNCT__ "MatSetUp_MPIAIJ" 2105 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2106 { 2107 PetscErrorCode ierr; 2108 2109 PetscFunctionBegin; 2110 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2116 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2117 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2118 { 2119 PetscInt i,m=Y->rmap->N; 2120 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2121 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2122 const PetscInt *xi = x->i,*yi = y->i; 2123 2124 PetscFunctionBegin; 2125 /* Set the number of nonzeros in the new matrix */ 2126 for (i=0; i<m; i++) { 2127 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2128 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2129 nnz[i] = 0; 2130 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2131 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2132 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2133 nnz[i]++; 2134 } 2135 for (; k<nzy; k++) nnz[i]++; 2136 } 2137 PetscFunctionReturn(0); 2138 } 2139 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatAXPY_MPIAIJ" 2142 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2143 { 2144 PetscErrorCode ierr; 2145 PetscInt i; 2146 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2147 PetscBLASInt bnz,one=1; 2148 Mat_SeqAIJ *x,*y; 2149 2150 PetscFunctionBegin; 2151 if (str == SAME_NONZERO_PATTERN) { 2152 PetscScalar alpha = a; 2153 x = (Mat_SeqAIJ*)xx->A->data; 2154 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2155 y = (Mat_SeqAIJ*)yy->A->data; 2156 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2157 x = (Mat_SeqAIJ*)xx->B->data; 2158 y = (Mat_SeqAIJ*)yy->B->data; 2159 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2160 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2161 } else if (str == SUBSET_NONZERO_PATTERN) { 2162 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2163 2164 x = (Mat_SeqAIJ*)xx->B->data; 2165 y = (Mat_SeqAIJ*)yy->B->data; 2166 if (y->xtoy && y->XtoY != xx->B) { 2167 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2168 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2169 } 2170 if (!y->xtoy) { /* get xtoy */ 2171 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2172 y->XtoY = xx->B; 2173 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2174 } 2175 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2176 } else { 2177 Mat B; 2178 PetscInt *nnz_d,*nnz_o; 2179 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2180 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2181 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2182 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2183 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2184 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2185 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2186 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2187 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2188 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2189 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2190 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2191 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2192 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2193 } 2194 PetscFunctionReturn(0); 2195 } 2196 2197 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatConjugate_MPIAIJ" 2201 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2202 { 2203 #if defined(PETSC_USE_COMPLEX) 2204 PetscErrorCode ierr; 2205 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2206 2207 PetscFunctionBegin; 2208 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2209 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2210 #else 2211 PetscFunctionBegin; 2212 #endif 2213 PetscFunctionReturn(0); 2214 } 2215 2216 #undef __FUNCT__ 2217 #define __FUNCT__ "MatRealPart_MPIAIJ" 2218 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2219 { 2220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2221 PetscErrorCode ierr; 2222 2223 PetscFunctionBegin; 2224 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2225 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2226 PetscFunctionReturn(0); 2227 } 2228 2229 #undef __FUNCT__ 2230 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2231 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2232 { 2233 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2234 PetscErrorCode ierr; 2235 2236 PetscFunctionBegin; 2237 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2238 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2239 PetscFunctionReturn(0); 2240 } 2241 2242 #if defined(PETSC_HAVE_PBGL) 2243 2244 #include <boost/parallel/mpi/bsp_process_group.hpp> 2245 #include <boost/graph/distributed/ilu_default_graph.hpp> 2246 #include <boost/graph/distributed/ilu_0_block.hpp> 2247 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2248 #include <boost/graph/distributed/petsc/interface.hpp> 2249 #include <boost/multi_array.hpp> 2250 #include <boost/parallel/distributed_property_map->hpp> 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2254 /* 2255 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2256 */ 2257 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2258 { 2259 namespace petsc = boost::distributed::petsc; 2260 2261 namespace graph_dist = boost::graph::distributed; 2262 using boost::graph::distributed::ilu_default::process_group_type; 2263 using boost::graph::ilu_permuted; 2264 2265 PetscBool row_identity, col_identity; 2266 PetscContainer c; 2267 PetscInt m, n, M, N; 2268 PetscErrorCode ierr; 2269 2270 PetscFunctionBegin; 2271 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2272 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2273 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2274 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2275 2276 process_group_type pg; 2277 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2278 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2279 lgraph_type& level_graph = *lgraph_p; 2280 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2281 2282 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2283 ilu_permuted(level_graph); 2284 2285 /* put together the new matrix */ 2286 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2287 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2288 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2289 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2290 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2291 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2292 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2293 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2294 2295 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2296 ierr = PetscContainerSetPointer(c, lgraph_p); 2297 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2298 ierr = PetscContainerDestroy(&c); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2304 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2305 { 2306 PetscFunctionBegin; 2307 PetscFunctionReturn(0); 2308 } 2309 2310 #undef __FUNCT__ 2311 #define __FUNCT__ "MatSolve_MPIAIJ" 2312 /* 2313 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2314 */ 2315 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2316 { 2317 namespace graph_dist = boost::graph::distributed; 2318 2319 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2320 lgraph_type *lgraph_p; 2321 PetscContainer c; 2322 PetscErrorCode ierr; 2323 2324 PetscFunctionBegin; 2325 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2326 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2327 ierr = VecCopy(b, x);CHKERRQ(ierr); 2328 2329 PetscScalar *array_x; 2330 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2331 PetscInt sx; 2332 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2333 2334 PetscScalar *array_b; 2335 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2336 PetscInt sb; 2337 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2338 2339 lgraph_type& level_graph = *lgraph_p; 2340 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2341 2342 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2343 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2344 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2345 2346 typedef boost::iterator_property_map<array_ref_type::iterator, 2347 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2348 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2349 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2350 2351 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2352 PetscFunctionReturn(0); 2353 } 2354 #endif 2355 2356 #undef __FUNCT__ 2357 #define __FUNCT__ "MatDestroy_MatRedundant" 2358 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2359 { 2360 PetscErrorCode ierr; 2361 Mat_Redundant *redund; 2362 PetscInt i; 2363 PetscMPIInt size; 2364 2365 PetscFunctionBegin; 2366 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2367 if (size == 1) { 2368 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2369 redund = a->redundant; 2370 } else { 2371 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2372 redund = a->redundant; 2373 } 2374 if (redund){ 2375 if (redund->matseq) { /* via MatGetSubMatrices() */ 2376 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2377 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2378 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2379 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2380 } else { 2381 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2382 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2383 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2384 for (i=0; i<redund->nrecvs; i++) { 2385 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2386 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2387 } 2388 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2389 } 2390 2391 if (redund->psubcomm) { 2392 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2393 } 2394 ierr = redund->Destroy(A);CHKERRQ(ierr); 2395 ierr = PetscFree(redund);CHKERRQ(ierr); 2396 } 2397 PetscFunctionReturn(0); 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2402 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2403 { 2404 PetscMPIInt rank,size; 2405 MPI_Comm comm; 2406 PetscErrorCode ierr; 2407 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2408 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2409 PetscInt *rowrange = mat->rmap->range; 2410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2411 Mat A = aij->A,B=aij->B,C=*matredundant; 2412 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2413 PetscScalar *sbuf_a; 2414 PetscInt nzlocal=a->nz+b->nz; 2415 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2416 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2417 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2418 MatScalar *aworkA,*aworkB; 2419 PetscScalar *vals; 2420 PetscMPIInt tag1,tag2,tag3,imdex; 2421 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2422 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2423 MPI_Status recv_status,*send_status; 2424 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2425 PetscInt **rbuf_j=NULL; 2426 PetscScalar **rbuf_a=NULL; 2427 Mat_Redundant *redund =NULL; 2428 2429 PetscFunctionBegin; 2430 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2431 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2432 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2433 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2434 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2435 2436 if (reuse == MAT_REUSE_MATRIX) { 2437 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2438 if (subsize == 1) { 2439 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2440 redund = c->redundant; 2441 } else { 2442 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2443 redund = c->redundant; 2444 } 2445 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2446 2447 nsends = redund->nsends; 2448 nrecvs = redund->nrecvs; 2449 send_rank = redund->send_rank; 2450 recv_rank = redund->recv_rank; 2451 sbuf_nz = redund->sbuf_nz; 2452 rbuf_nz = redund->rbuf_nz; 2453 sbuf_j = redund->sbuf_j; 2454 sbuf_a = redund->sbuf_a; 2455 rbuf_j = redund->rbuf_j; 2456 rbuf_a = redund->rbuf_a; 2457 } 2458 2459 if (reuse == MAT_INITIAL_MATRIX) { 2460 PetscInt nleftover,np_subcomm; 2461 2462 /* get the destination processors' id send_rank, nsends and nrecvs */ 2463 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2464 2465 np_subcomm = size/nsubcomm; 2466 nleftover = size - nsubcomm*np_subcomm; 2467 2468 /* block of codes below is specific for INTERLACED */ 2469 /* ------------------------------------------------*/ 2470 nsends = 0; nrecvs = 0; 2471 for (i=0; i<size; i++) { 2472 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2473 send_rank[nsends++] = i; 2474 recv_rank[nrecvs++] = i; 2475 } 2476 } 2477 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2478 i = size-nleftover-1; 2479 j = 0; 2480 while (j < nsubcomm - nleftover) { 2481 send_rank[nsends++] = i; 2482 i--; j++; 2483 } 2484 } 2485 2486 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2487 for (i=0; i<nleftover; i++) { 2488 recv_rank[nrecvs++] = size-nleftover+i; 2489 } 2490 } 2491 /*----------------------------------------------*/ 2492 2493 /* allocate sbuf_j, sbuf_a */ 2494 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2495 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2496 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2497 /* 2498 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2499 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2500 */ 2501 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2502 2503 /* copy mat's local entries into the buffers */ 2504 if (reuse == MAT_INITIAL_MATRIX) { 2505 rownz_max = 0; 2506 rptr = sbuf_j; 2507 cols = sbuf_j + rend-rstart + 1; 2508 vals = sbuf_a; 2509 rptr[0] = 0; 2510 for (i=0; i<rend-rstart; i++) { 2511 row = i + rstart; 2512 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2513 ncols = nzA + nzB; 2514 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2515 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2516 /* load the column indices for this row into cols */ 2517 lwrite = 0; 2518 for (l=0; l<nzB; l++) { 2519 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2520 vals[lwrite] = aworkB[l]; 2521 cols[lwrite++] = ctmp; 2522 } 2523 } 2524 for (l=0; l<nzA; l++) { 2525 vals[lwrite] = aworkA[l]; 2526 cols[lwrite++] = cstart + cworkA[l]; 2527 } 2528 for (l=0; l<nzB; l++) { 2529 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2530 vals[lwrite] = aworkB[l]; 2531 cols[lwrite++] = ctmp; 2532 } 2533 } 2534 vals += ncols; 2535 cols += ncols; 2536 rptr[i+1] = rptr[i] + ncols; 2537 if (rownz_max < ncols) rownz_max = ncols; 2538 } 2539 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2540 } else { /* only copy matrix values into sbuf_a */ 2541 rptr = sbuf_j; 2542 vals = sbuf_a; 2543 rptr[0] = 0; 2544 for (i=0; i<rend-rstart; i++) { 2545 row = i + rstart; 2546 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2547 ncols = nzA + nzB; 2548 cworkB = b->j + b->i[i]; 2549 aworkA = a->a + a->i[i]; 2550 aworkB = b->a + b->i[i]; 2551 lwrite = 0; 2552 for (l=0; l<nzB; l++) { 2553 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2554 } 2555 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2556 for (l=0; l<nzB; l++) { 2557 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2558 } 2559 vals += ncols; 2560 rptr[i+1] = rptr[i] + ncols; 2561 } 2562 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2563 2564 /* send nzlocal to others, and recv other's nzlocal */ 2565 /*--------------------------------------------------*/ 2566 if (reuse == MAT_INITIAL_MATRIX) { 2567 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2568 2569 s_waits2 = s_waits3 + nsends; 2570 s_waits1 = s_waits2 + nsends; 2571 r_waits1 = s_waits1 + nsends; 2572 r_waits2 = r_waits1 + nrecvs; 2573 r_waits3 = r_waits2 + nrecvs; 2574 } else { 2575 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2576 2577 r_waits3 = s_waits3 + nsends; 2578 } 2579 2580 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2581 if (reuse == MAT_INITIAL_MATRIX) { 2582 /* get new tags to keep the communication clean */ 2583 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2584 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2585 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2586 2587 /* post receives of other's nzlocal */ 2588 for (i=0; i<nrecvs; i++) { 2589 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2590 } 2591 /* send nzlocal to others */ 2592 for (i=0; i<nsends; i++) { 2593 sbuf_nz[i] = nzlocal; 2594 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2595 } 2596 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2597 count = nrecvs; 2598 while (count) { 2599 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2600 2601 recv_rank[imdex] = recv_status.MPI_SOURCE; 2602 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2603 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2604 2605 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2606 2607 rbuf_nz[imdex] += i + 2; 2608 2609 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2610 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2611 count--; 2612 } 2613 /* wait on sends of nzlocal */ 2614 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2615 /* send mat->i,j to others, and recv from other's */ 2616 /*------------------------------------------------*/ 2617 for (i=0; i<nsends; i++) { 2618 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2619 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2620 } 2621 /* wait on receives of mat->i,j */ 2622 /*------------------------------*/ 2623 count = nrecvs; 2624 while (count) { 2625 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2626 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2627 count--; 2628 } 2629 /* wait on sends of mat->i,j */ 2630 /*---------------------------*/ 2631 if (nsends) { 2632 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2633 } 2634 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2635 2636 /* post receives, send and receive mat->a */ 2637 /*----------------------------------------*/ 2638 for (imdex=0; imdex<nrecvs; imdex++) { 2639 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2640 } 2641 for (i=0; i<nsends; i++) { 2642 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2643 } 2644 count = nrecvs; 2645 while (count) { 2646 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2647 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2648 count--; 2649 } 2650 if (nsends) { 2651 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2652 } 2653 2654 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2655 2656 /* create redundant matrix */ 2657 /*-------------------------*/ 2658 if (reuse == MAT_INITIAL_MATRIX) { 2659 const PetscInt *range; 2660 PetscInt rstart_sub,rend_sub,mloc_sub; 2661 2662 /* compute rownz_max for preallocation */ 2663 for (imdex=0; imdex<nrecvs; imdex++) { 2664 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2665 rptr = rbuf_j[imdex]; 2666 for (i=0; i<j; i++) { 2667 ncols = rptr[i+1] - rptr[i]; 2668 if (rownz_max < ncols) rownz_max = ncols; 2669 } 2670 } 2671 2672 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2673 2674 /* get local size of redundant matrix 2675 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2676 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2677 rstart_sub = range[nsubcomm*subrank]; 2678 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2679 rend_sub = range[nsubcomm*(subrank+1)]; 2680 } else { 2681 rend_sub = mat->rmap->N; 2682 } 2683 mloc_sub = rend_sub - rstart_sub; 2684 2685 if (M == N) { 2686 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2687 } else { /* non-square matrix */ 2688 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2689 } 2690 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2691 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2692 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2693 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2694 } else { 2695 C = *matredundant; 2696 } 2697 2698 /* insert local matrix entries */ 2699 rptr = sbuf_j; 2700 cols = sbuf_j + rend-rstart + 1; 2701 vals = sbuf_a; 2702 for (i=0; i<rend-rstart; i++) { 2703 row = i + rstart; 2704 ncols = rptr[i+1] - rptr[i]; 2705 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2706 vals += ncols; 2707 cols += ncols; 2708 } 2709 /* insert received matrix entries */ 2710 for (imdex=0; imdex<nrecvs; imdex++) { 2711 rstart = rowrange[recv_rank[imdex]]; 2712 rend = rowrange[recv_rank[imdex]+1]; 2713 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2714 rptr = rbuf_j[imdex]; 2715 cols = rbuf_j[imdex] + rend-rstart + 1; 2716 vals = rbuf_a[imdex]; 2717 for (i=0; i<rend-rstart; i++) { 2718 row = i + rstart; 2719 ncols = rptr[i+1] - rptr[i]; 2720 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2721 vals += ncols; 2722 cols += ncols; 2723 } 2724 } 2725 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2726 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2727 2728 if (reuse == MAT_INITIAL_MATRIX) { 2729 *matredundant = C; 2730 2731 /* create a supporting struct and attach it to C for reuse */ 2732 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2733 if (subsize == 1) { 2734 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2735 c->redundant = redund; 2736 } else { 2737 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2738 c->redundant = redund; 2739 } 2740 2741 redund->nzlocal = nzlocal; 2742 redund->nsends = nsends; 2743 redund->nrecvs = nrecvs; 2744 redund->send_rank = send_rank; 2745 redund->recv_rank = recv_rank; 2746 redund->sbuf_nz = sbuf_nz; 2747 redund->rbuf_nz = rbuf_nz; 2748 redund->sbuf_j = sbuf_j; 2749 redund->sbuf_a = sbuf_a; 2750 redund->rbuf_j = rbuf_j; 2751 redund->rbuf_a = rbuf_a; 2752 redund->psubcomm = NULL; 2753 2754 redund->Destroy = C->ops->destroy; 2755 C->ops->destroy = MatDestroy_MatRedundant; 2756 } 2757 PetscFunctionReturn(0); 2758 } 2759 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2762 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2763 { 2764 PetscErrorCode ierr; 2765 MPI_Comm comm; 2766 PetscMPIInt size,subsize; 2767 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2768 Mat_Redundant *redund=NULL; 2769 PetscSubcomm psubcomm=NULL; 2770 MPI_Comm subcomm_in=subcomm; 2771 Mat *matseq; 2772 IS isrow,iscol; 2773 2774 PetscFunctionBegin; 2775 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2776 if (reuse == MAT_INITIAL_MATRIX) { 2777 /* create psubcomm, then get subcomm */ 2778 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2779 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2780 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2781 2782 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2783 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2784 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2785 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2786 subcomm = psubcomm->comm; 2787 } else { /* retrieve psubcomm and subcomm */ 2788 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2789 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2790 if (subsize == 1) { 2791 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2792 redund = c->redundant; 2793 } else { 2794 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2795 redund = c->redundant; 2796 } 2797 psubcomm = redund->psubcomm; 2798 } 2799 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2800 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2801 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2802 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2803 if (subsize == 1) { 2804 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2805 c->redundant->psubcomm = psubcomm; 2806 } else { 2807 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2808 c->redundant->psubcomm = psubcomm ; 2809 } 2810 } 2811 PetscFunctionReturn(0); 2812 } 2813 } 2814 2815 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2816 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2817 if (reuse == MAT_INITIAL_MATRIX) { 2818 /* create a local sequential matrix matseq[0] */ 2819 mloc_sub = PETSC_DECIDE; 2820 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2821 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2822 rstart = rend - mloc_sub; 2823 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2824 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2825 } else { /* reuse == MAT_REUSE_MATRIX */ 2826 if (subsize == 1) { 2827 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2828 redund = c->redundant; 2829 } else { 2830 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2831 redund = c->redundant; 2832 } 2833 2834 isrow = redund->isrow; 2835 iscol = redund->iscol; 2836 matseq = redund->matseq; 2837 } 2838 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2839 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2840 2841 if (reuse == MAT_INITIAL_MATRIX) { 2842 /* create a supporting struct and attach it to C for reuse */ 2843 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2844 if (subsize == 1) { 2845 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2846 c->redundant = redund; 2847 } else { 2848 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2849 c->redundant = redund; 2850 } 2851 redund->isrow = isrow; 2852 redund->iscol = iscol; 2853 redund->matseq = matseq; 2854 redund->psubcomm = psubcomm; 2855 redund->Destroy = (*matredundant)->ops->destroy; 2856 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2857 } 2858 PetscFunctionReturn(0); 2859 } 2860 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2863 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2864 { 2865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2866 PetscErrorCode ierr; 2867 PetscInt i,*idxb = 0; 2868 PetscScalar *va,*vb; 2869 Vec vtmp; 2870 2871 PetscFunctionBegin; 2872 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2873 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2874 if (idx) { 2875 for (i=0; i<A->rmap->n; i++) { 2876 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2877 } 2878 } 2879 2880 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2881 if (idx) { 2882 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2883 } 2884 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2885 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2886 2887 for (i=0; i<A->rmap->n; i++) { 2888 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2889 va[i] = vb[i]; 2890 if (idx) idx[i] = a->garray[idxb[i]]; 2891 } 2892 } 2893 2894 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2895 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2896 ierr = PetscFree(idxb);CHKERRQ(ierr); 2897 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2898 PetscFunctionReturn(0); 2899 } 2900 2901 #undef __FUNCT__ 2902 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2903 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2904 { 2905 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2906 PetscErrorCode ierr; 2907 PetscInt i,*idxb = 0; 2908 PetscScalar *va,*vb; 2909 Vec vtmp; 2910 2911 PetscFunctionBegin; 2912 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2913 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2914 if (idx) { 2915 for (i=0; i<A->cmap->n; i++) { 2916 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2917 } 2918 } 2919 2920 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2921 if (idx) { 2922 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2923 } 2924 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2925 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2926 2927 for (i=0; i<A->rmap->n; i++) { 2928 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2929 va[i] = vb[i]; 2930 if (idx) idx[i] = a->garray[idxb[i]]; 2931 } 2932 } 2933 2934 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2935 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2936 ierr = PetscFree(idxb);CHKERRQ(ierr); 2937 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2938 PetscFunctionReturn(0); 2939 } 2940 2941 #undef __FUNCT__ 2942 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2943 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2944 { 2945 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2946 PetscInt n = A->rmap->n; 2947 PetscInt cstart = A->cmap->rstart; 2948 PetscInt *cmap = mat->garray; 2949 PetscInt *diagIdx, *offdiagIdx; 2950 Vec diagV, offdiagV; 2951 PetscScalar *a, *diagA, *offdiagA; 2952 PetscInt r; 2953 PetscErrorCode ierr; 2954 2955 PetscFunctionBegin; 2956 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2957 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2958 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2959 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2960 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2961 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2962 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2963 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2964 for (r = 0; r < n; ++r) { 2965 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2966 a[r] = diagA[r]; 2967 idx[r] = cstart + diagIdx[r]; 2968 } else { 2969 a[r] = offdiagA[r]; 2970 idx[r] = cmap[offdiagIdx[r]]; 2971 } 2972 } 2973 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2974 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2975 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2976 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2977 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2978 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2979 PetscFunctionReturn(0); 2980 } 2981 2982 #undef __FUNCT__ 2983 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2984 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2985 { 2986 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2987 PetscInt n = A->rmap->n; 2988 PetscInt cstart = A->cmap->rstart; 2989 PetscInt *cmap = mat->garray; 2990 PetscInt *diagIdx, *offdiagIdx; 2991 Vec diagV, offdiagV; 2992 PetscScalar *a, *diagA, *offdiagA; 2993 PetscInt r; 2994 PetscErrorCode ierr; 2995 2996 PetscFunctionBegin; 2997 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2998 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2999 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3000 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3001 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3002 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3003 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3004 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3005 for (r = 0; r < n; ++r) { 3006 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3007 a[r] = diagA[r]; 3008 idx[r] = cstart + diagIdx[r]; 3009 } else { 3010 a[r] = offdiagA[r]; 3011 idx[r] = cmap[offdiagIdx[r]]; 3012 } 3013 } 3014 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3015 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3016 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3017 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3018 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3019 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3020 PetscFunctionReturn(0); 3021 } 3022 3023 #undef __FUNCT__ 3024 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3025 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3026 { 3027 PetscErrorCode ierr; 3028 Mat *dummy; 3029 3030 PetscFunctionBegin; 3031 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3032 *newmat = *dummy; 3033 ierr = PetscFree(dummy);CHKERRQ(ierr); 3034 PetscFunctionReturn(0); 3035 } 3036 3037 #undef __FUNCT__ 3038 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3039 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3040 { 3041 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3042 PetscErrorCode ierr; 3043 3044 PetscFunctionBegin; 3045 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3046 PetscFunctionReturn(0); 3047 } 3048 3049 #undef __FUNCT__ 3050 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3051 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3052 { 3053 PetscErrorCode ierr; 3054 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3055 3056 PetscFunctionBegin; 3057 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3058 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3059 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 PetscFunctionReturn(0); 3062 } 3063 3064 /* -------------------------------------------------------------------*/ 3065 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3066 MatGetRow_MPIAIJ, 3067 MatRestoreRow_MPIAIJ, 3068 MatMult_MPIAIJ, 3069 /* 4*/ MatMultAdd_MPIAIJ, 3070 MatMultTranspose_MPIAIJ, 3071 MatMultTransposeAdd_MPIAIJ, 3072 #if defined(PETSC_HAVE_PBGL) 3073 MatSolve_MPIAIJ, 3074 #else 3075 0, 3076 #endif 3077 0, 3078 0, 3079 /*10*/ 0, 3080 0, 3081 0, 3082 MatSOR_MPIAIJ, 3083 MatTranspose_MPIAIJ, 3084 /*15*/ MatGetInfo_MPIAIJ, 3085 MatEqual_MPIAIJ, 3086 MatGetDiagonal_MPIAIJ, 3087 MatDiagonalScale_MPIAIJ, 3088 MatNorm_MPIAIJ, 3089 /*20*/ MatAssemblyBegin_MPIAIJ, 3090 MatAssemblyEnd_MPIAIJ, 3091 MatSetOption_MPIAIJ, 3092 MatZeroEntries_MPIAIJ, 3093 /*24*/ MatZeroRows_MPIAIJ, 3094 0, 3095 #if defined(PETSC_HAVE_PBGL) 3096 0, 3097 #else 3098 0, 3099 #endif 3100 0, 3101 0, 3102 /*29*/ MatSetUp_MPIAIJ, 3103 #if defined(PETSC_HAVE_PBGL) 3104 0, 3105 #else 3106 0, 3107 #endif 3108 0, 3109 0, 3110 0, 3111 /*34*/ MatDuplicate_MPIAIJ, 3112 0, 3113 0, 3114 0, 3115 0, 3116 /*39*/ MatAXPY_MPIAIJ, 3117 MatGetSubMatrices_MPIAIJ, 3118 MatIncreaseOverlap_MPIAIJ, 3119 MatGetValues_MPIAIJ, 3120 MatCopy_MPIAIJ, 3121 /*44*/ MatGetRowMax_MPIAIJ, 3122 MatScale_MPIAIJ, 3123 0, 3124 0, 3125 MatZeroRowsColumns_MPIAIJ, 3126 /*49*/ MatSetRandom_MPIAIJ, 3127 0, 3128 0, 3129 0, 3130 0, 3131 /*54*/ MatFDColoringCreate_MPIXAIJ, 3132 0, 3133 MatSetUnfactored_MPIAIJ, 3134 MatPermute_MPIAIJ, 3135 0, 3136 /*59*/ MatGetSubMatrix_MPIAIJ, 3137 MatDestroy_MPIAIJ, 3138 MatView_MPIAIJ, 3139 0, 3140 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3141 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3142 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3143 0, 3144 0, 3145 0, 3146 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3147 MatGetRowMinAbs_MPIAIJ, 3148 0, 3149 MatSetColoring_MPIAIJ, 3150 0, 3151 MatSetValuesAdifor_MPIAIJ, 3152 /*75*/ MatFDColoringApply_AIJ, 3153 0, 3154 0, 3155 0, 3156 MatFindZeroDiagonals_MPIAIJ, 3157 /*80*/ 0, 3158 0, 3159 0, 3160 /*83*/ MatLoad_MPIAIJ, 3161 0, 3162 0, 3163 0, 3164 0, 3165 0, 3166 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3167 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3168 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3169 MatPtAP_MPIAIJ_MPIAIJ, 3170 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3171 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3172 0, 3173 0, 3174 0, 3175 0, 3176 /*99*/ 0, 3177 0, 3178 0, 3179 MatConjugate_MPIAIJ, 3180 0, 3181 /*104*/MatSetValuesRow_MPIAIJ, 3182 MatRealPart_MPIAIJ, 3183 MatImaginaryPart_MPIAIJ, 3184 0, 3185 0, 3186 /*109*/0, 3187 MatGetRedundantMatrix_MPIAIJ, 3188 MatGetRowMin_MPIAIJ, 3189 0, 3190 0, 3191 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3192 0, 3193 0, 3194 0, 3195 0, 3196 /*119*/0, 3197 0, 3198 0, 3199 0, 3200 MatGetMultiProcBlock_MPIAIJ, 3201 /*124*/MatFindNonzeroRows_MPIAIJ, 3202 MatGetColumnNorms_MPIAIJ, 3203 MatInvertBlockDiagonal_MPIAIJ, 3204 0, 3205 MatGetSubMatricesParallel_MPIAIJ, 3206 /*129*/0, 3207 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3208 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3209 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3210 0, 3211 /*134*/0, 3212 0, 3213 0, 3214 0, 3215 0, 3216 /*139*/0, 3217 0, 3218 0, 3219 MatFDColoringSetUp_MPIXAIJ 3220 }; 3221 3222 /* ----------------------------------------------------------------------------------------*/ 3223 3224 #undef __FUNCT__ 3225 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3226 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3227 { 3228 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3229 PetscErrorCode ierr; 3230 3231 PetscFunctionBegin; 3232 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3233 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3234 PetscFunctionReturn(0); 3235 } 3236 3237 #undef __FUNCT__ 3238 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3239 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3240 { 3241 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3242 PetscErrorCode ierr; 3243 3244 PetscFunctionBegin; 3245 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3246 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3247 PetscFunctionReturn(0); 3248 } 3249 3250 #undef __FUNCT__ 3251 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3252 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3253 { 3254 Mat_MPIAIJ *b; 3255 PetscErrorCode ierr; 3256 3257 PetscFunctionBegin; 3258 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3259 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3260 b = (Mat_MPIAIJ*)B->data; 3261 3262 if (!B->preallocated) { 3263 /* Explicitly create 2 MATSEQAIJ matrices. */ 3264 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3265 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3266 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3267 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3268 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3269 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3270 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3271 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3272 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3273 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3274 } 3275 3276 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3277 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3278 B->preallocated = PETSC_TRUE; 3279 PetscFunctionReturn(0); 3280 } 3281 3282 #undef __FUNCT__ 3283 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3284 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3285 { 3286 Mat mat; 3287 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3288 PetscErrorCode ierr; 3289 3290 PetscFunctionBegin; 3291 *newmat = 0; 3292 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3293 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3294 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3295 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3296 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3297 a = (Mat_MPIAIJ*)mat->data; 3298 3299 mat->factortype = matin->factortype; 3300 mat->assembled = PETSC_TRUE; 3301 mat->insertmode = NOT_SET_VALUES; 3302 mat->preallocated = PETSC_TRUE; 3303 3304 a->size = oldmat->size; 3305 a->rank = oldmat->rank; 3306 a->donotstash = oldmat->donotstash; 3307 a->roworiented = oldmat->roworiented; 3308 a->rowindices = 0; 3309 a->rowvalues = 0; 3310 a->getrowactive = PETSC_FALSE; 3311 3312 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3313 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3314 3315 if (oldmat->colmap) { 3316 #if defined(PETSC_USE_CTABLE) 3317 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3318 #else 3319 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3320 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3321 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3322 #endif 3323 } else a->colmap = 0; 3324 if (oldmat->garray) { 3325 PetscInt len; 3326 len = oldmat->B->cmap->n; 3327 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3328 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3329 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3330 } else a->garray = 0; 3331 3332 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3333 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3334 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3335 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3336 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3337 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3338 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3339 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3340 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3341 *newmat = mat; 3342 PetscFunctionReturn(0); 3343 } 3344 3345 3346 3347 #undef __FUNCT__ 3348 #define __FUNCT__ "MatLoad_MPIAIJ" 3349 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3350 { 3351 PetscScalar *vals,*svals; 3352 MPI_Comm comm; 3353 PetscErrorCode ierr; 3354 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3355 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3356 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3357 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3358 PetscInt cend,cstart,n,*rowners,sizesset=1; 3359 int fd; 3360 PetscInt bs = 1; 3361 3362 PetscFunctionBegin; 3363 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3364 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3365 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3366 if (!rank) { 3367 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3368 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3369 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3370 } 3371 3372 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3373 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3374 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3375 3376 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3377 3378 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3379 M = header[1]; N = header[2]; 3380 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3381 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3382 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3383 3384 /* If global sizes are set, check if they are consistent with that given in the file */ 3385 if (sizesset) { 3386 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3387 } 3388 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3389 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3390 3391 /* determine ownership of all (block) rows */ 3392 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3393 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3394 else m = newMat->rmap->n; /* Set by user */ 3395 3396 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3397 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3398 3399 /* First process needs enough room for process with most rows */ 3400 if (!rank) { 3401 mmax = rowners[1]; 3402 for (i=2; i<=size; i++) { 3403 mmax = PetscMax(mmax, rowners[i]); 3404 } 3405 } else mmax = -1; /* unused, but compilers complain */ 3406 3407 rowners[0] = 0; 3408 for (i=2; i<=size; i++) { 3409 rowners[i] += rowners[i-1]; 3410 } 3411 rstart = rowners[rank]; 3412 rend = rowners[rank+1]; 3413 3414 /* distribute row lengths to all processors */ 3415 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3416 if (!rank) { 3417 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3418 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3419 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3420 for (j=0; j<m; j++) { 3421 procsnz[0] += ourlens[j]; 3422 } 3423 for (i=1; i<size; i++) { 3424 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3425 /* calculate the number of nonzeros on each processor */ 3426 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3427 procsnz[i] += rowlengths[j]; 3428 } 3429 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3430 } 3431 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3432 } else { 3433 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3434 } 3435 3436 if (!rank) { 3437 /* determine max buffer needed and allocate it */ 3438 maxnz = 0; 3439 for (i=0; i<size; i++) { 3440 maxnz = PetscMax(maxnz,procsnz[i]); 3441 } 3442 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3443 3444 /* read in my part of the matrix column indices */ 3445 nz = procsnz[0]; 3446 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3447 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3448 3449 /* read in every one elses and ship off */ 3450 for (i=1; i<size; i++) { 3451 nz = procsnz[i]; 3452 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3453 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3454 } 3455 ierr = PetscFree(cols);CHKERRQ(ierr); 3456 } else { 3457 /* determine buffer space needed for message */ 3458 nz = 0; 3459 for (i=0; i<m; i++) { 3460 nz += ourlens[i]; 3461 } 3462 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3463 3464 /* receive message of column indices*/ 3465 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3466 } 3467 3468 /* determine column ownership if matrix is not square */ 3469 if (N != M) { 3470 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3471 else n = newMat->cmap->n; 3472 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3473 cstart = cend - n; 3474 } else { 3475 cstart = rstart; 3476 cend = rend; 3477 n = cend - cstart; 3478 } 3479 3480 /* loop over local rows, determining number of off diagonal entries */ 3481 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3482 jj = 0; 3483 for (i=0; i<m; i++) { 3484 for (j=0; j<ourlens[i]; j++) { 3485 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3486 jj++; 3487 } 3488 } 3489 3490 for (i=0; i<m; i++) { 3491 ourlens[i] -= offlens[i]; 3492 } 3493 if (!sizesset) { 3494 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3495 } 3496 3497 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3498 3499 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3500 3501 for (i=0; i<m; i++) { 3502 ourlens[i] += offlens[i]; 3503 } 3504 3505 if (!rank) { 3506 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3507 3508 /* read in my part of the matrix numerical values */ 3509 nz = procsnz[0]; 3510 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3511 3512 /* insert into matrix */ 3513 jj = rstart; 3514 smycols = mycols; 3515 svals = vals; 3516 for (i=0; i<m; i++) { 3517 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3518 smycols += ourlens[i]; 3519 svals += ourlens[i]; 3520 jj++; 3521 } 3522 3523 /* read in other processors and ship out */ 3524 for (i=1; i<size; i++) { 3525 nz = procsnz[i]; 3526 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3527 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3528 } 3529 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3530 } else { 3531 /* receive numeric values */ 3532 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3533 3534 /* receive message of values*/ 3535 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3536 3537 /* insert into matrix */ 3538 jj = rstart; 3539 smycols = mycols; 3540 svals = vals; 3541 for (i=0; i<m; i++) { 3542 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3543 smycols += ourlens[i]; 3544 svals += ourlens[i]; 3545 jj++; 3546 } 3547 } 3548 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3549 ierr = PetscFree(vals);CHKERRQ(ierr); 3550 ierr = PetscFree(mycols);CHKERRQ(ierr); 3551 ierr = PetscFree(rowners);CHKERRQ(ierr); 3552 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3553 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3554 PetscFunctionReturn(0); 3555 } 3556 3557 #undef __FUNCT__ 3558 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3559 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3560 { 3561 PetscErrorCode ierr; 3562 IS iscol_local; 3563 PetscInt csize; 3564 3565 PetscFunctionBegin; 3566 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3567 if (call == MAT_REUSE_MATRIX) { 3568 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3569 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3570 } else { 3571 PetscInt cbs; 3572 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3573 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3574 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3575 } 3576 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3577 if (call == MAT_INITIAL_MATRIX) { 3578 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3579 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3580 } 3581 PetscFunctionReturn(0); 3582 } 3583 3584 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3585 #undef __FUNCT__ 3586 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3587 /* 3588 Not great since it makes two copies of the submatrix, first an SeqAIJ 3589 in local and then by concatenating the local matrices the end result. 3590 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3591 3592 Note: This requires a sequential iscol with all indices. 3593 */ 3594 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3595 { 3596 PetscErrorCode ierr; 3597 PetscMPIInt rank,size; 3598 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3599 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3600 PetscBool allcolumns, colflag; 3601 Mat M,Mreuse; 3602 MatScalar *vwork,*aa; 3603 MPI_Comm comm; 3604 Mat_SeqAIJ *aij; 3605 3606 PetscFunctionBegin; 3607 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3608 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3609 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3610 3611 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3612 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3613 if (colflag && ncol == mat->cmap->N) { 3614 allcolumns = PETSC_TRUE; 3615 } else { 3616 allcolumns = PETSC_FALSE; 3617 } 3618 if (call == MAT_REUSE_MATRIX) { 3619 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3620 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3621 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3622 } else { 3623 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3624 } 3625 3626 /* 3627 m - number of local rows 3628 n - number of columns (same on all processors) 3629 rstart - first row in new global matrix generated 3630 */ 3631 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3632 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3633 if (call == MAT_INITIAL_MATRIX) { 3634 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3635 ii = aij->i; 3636 jj = aij->j; 3637 3638 /* 3639 Determine the number of non-zeros in the diagonal and off-diagonal 3640 portions of the matrix in order to do correct preallocation 3641 */ 3642 3643 /* first get start and end of "diagonal" columns */ 3644 if (csize == PETSC_DECIDE) { 3645 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3646 if (mglobal == n) { /* square matrix */ 3647 nlocal = m; 3648 } else { 3649 nlocal = n/size + ((n % size) > rank); 3650 } 3651 } else { 3652 nlocal = csize; 3653 } 3654 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3655 rstart = rend - nlocal; 3656 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3657 3658 /* next, compute all the lengths */ 3659 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3660 olens = dlens + m; 3661 for (i=0; i<m; i++) { 3662 jend = ii[i+1] - ii[i]; 3663 olen = 0; 3664 dlen = 0; 3665 for (j=0; j<jend; j++) { 3666 if (*jj < rstart || *jj >= rend) olen++; 3667 else dlen++; 3668 jj++; 3669 } 3670 olens[i] = olen; 3671 dlens[i] = dlen; 3672 } 3673 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3674 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3675 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3676 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3677 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3678 ierr = PetscFree(dlens);CHKERRQ(ierr); 3679 } else { 3680 PetscInt ml,nl; 3681 3682 M = *newmat; 3683 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3684 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3685 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3686 /* 3687 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3688 rather than the slower MatSetValues(). 3689 */ 3690 M->was_assembled = PETSC_TRUE; 3691 M->assembled = PETSC_FALSE; 3692 } 3693 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3694 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3695 ii = aij->i; 3696 jj = aij->j; 3697 aa = aij->a; 3698 for (i=0; i<m; i++) { 3699 row = rstart + i; 3700 nz = ii[i+1] - ii[i]; 3701 cwork = jj; jj += nz; 3702 vwork = aa; aa += nz; 3703 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3704 } 3705 3706 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3707 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3708 *newmat = M; 3709 3710 /* save submatrix used in processor for next request */ 3711 if (call == MAT_INITIAL_MATRIX) { 3712 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3713 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3714 } 3715 PetscFunctionReturn(0); 3716 } 3717 3718 #undef __FUNCT__ 3719 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3720 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3721 { 3722 PetscInt m,cstart, cend,j,nnz,i,d; 3723 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3724 const PetscInt *JJ; 3725 PetscScalar *values; 3726 PetscErrorCode ierr; 3727 3728 PetscFunctionBegin; 3729 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3730 3731 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3732 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3733 m = B->rmap->n; 3734 cstart = B->cmap->rstart; 3735 cend = B->cmap->rend; 3736 rstart = B->rmap->rstart; 3737 3738 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3739 3740 #if defined(PETSC_USE_DEBUGGING) 3741 for (i=0; i<m; i++) { 3742 nnz = Ii[i+1]- Ii[i]; 3743 JJ = J + Ii[i]; 3744 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3745 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3746 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3747 } 3748 #endif 3749 3750 for (i=0; i<m; i++) { 3751 nnz = Ii[i+1]- Ii[i]; 3752 JJ = J + Ii[i]; 3753 nnz_max = PetscMax(nnz_max,nnz); 3754 d = 0; 3755 for (j=0; j<nnz; j++) { 3756 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3757 } 3758 d_nnz[i] = d; 3759 o_nnz[i] = nnz - d; 3760 } 3761 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3762 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3763 3764 if (v) values = (PetscScalar*)v; 3765 else { 3766 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3767 } 3768 3769 for (i=0; i<m; i++) { 3770 ii = i + rstart; 3771 nnz = Ii[i+1]- Ii[i]; 3772 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3773 } 3774 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3775 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3776 3777 if (!v) { 3778 ierr = PetscFree(values);CHKERRQ(ierr); 3779 } 3780 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3781 PetscFunctionReturn(0); 3782 } 3783 3784 #undef __FUNCT__ 3785 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3786 /*@ 3787 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3788 (the default parallel PETSc format). 3789 3790 Collective on MPI_Comm 3791 3792 Input Parameters: 3793 + B - the matrix 3794 . i - the indices into j for the start of each local row (starts with zero) 3795 . j - the column indices for each local row (starts with zero) 3796 - v - optional values in the matrix 3797 3798 Level: developer 3799 3800 Notes: 3801 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3802 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3803 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3804 3805 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3806 3807 The format which is used for the sparse matrix input, is equivalent to a 3808 row-major ordering.. i.e for the following matrix, the input data expected is 3809 as shown: 3810 3811 1 0 0 3812 2 0 3 P0 3813 ------- 3814 4 5 6 P1 3815 3816 Process0 [P0]: rows_owned=[0,1] 3817 i = {0,1,3} [size = nrow+1 = 2+1] 3818 j = {0,0,2} [size = nz = 6] 3819 v = {1,2,3} [size = nz = 6] 3820 3821 Process1 [P1]: rows_owned=[2] 3822 i = {0,3} [size = nrow+1 = 1+1] 3823 j = {0,1,2} [size = nz = 6] 3824 v = {4,5,6} [size = nz = 6] 3825 3826 .keywords: matrix, aij, compressed row, sparse, parallel 3827 3828 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3829 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3830 @*/ 3831 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3832 { 3833 PetscErrorCode ierr; 3834 3835 PetscFunctionBegin; 3836 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3837 PetscFunctionReturn(0); 3838 } 3839 3840 #undef __FUNCT__ 3841 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3842 /*@C 3843 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3844 (the default parallel PETSc format). For good matrix assembly performance 3845 the user should preallocate the matrix storage by setting the parameters 3846 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3847 performance can be increased by more than a factor of 50. 3848 3849 Collective on MPI_Comm 3850 3851 Input Parameters: 3852 + A - the matrix 3853 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3854 (same value is used for all local rows) 3855 . d_nnz - array containing the number of nonzeros in the various rows of the 3856 DIAGONAL portion of the local submatrix (possibly different for each row) 3857 or NULL, if d_nz is used to specify the nonzero structure. 3858 The size of this array is equal to the number of local rows, i.e 'm'. 3859 For matrices that will be factored, you must leave room for (and set) 3860 the diagonal entry even if it is zero. 3861 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3862 submatrix (same value is used for all local rows). 3863 - o_nnz - array containing the number of nonzeros in the various rows of the 3864 OFF-DIAGONAL portion of the local submatrix (possibly different for 3865 each row) or NULL, if o_nz is used to specify the nonzero 3866 structure. The size of this array is equal to the number 3867 of local rows, i.e 'm'. 3868 3869 If the *_nnz parameter is given then the *_nz parameter is ignored 3870 3871 The AIJ format (also called the Yale sparse matrix format or 3872 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3873 storage. The stored row and column indices begin with zero. 3874 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3875 3876 The parallel matrix is partitioned such that the first m0 rows belong to 3877 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3878 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3879 3880 The DIAGONAL portion of the local submatrix of a processor can be defined 3881 as the submatrix which is obtained by extraction the part corresponding to 3882 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3883 first row that belongs to the processor, r2 is the last row belonging to 3884 the this processor, and c1-c2 is range of indices of the local part of a 3885 vector suitable for applying the matrix to. This is an mxn matrix. In the 3886 common case of a square matrix, the row and column ranges are the same and 3887 the DIAGONAL part is also square. The remaining portion of the local 3888 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3889 3890 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3891 3892 You can call MatGetInfo() to get information on how effective the preallocation was; 3893 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3894 You can also run with the option -info and look for messages with the string 3895 malloc in them to see if additional memory allocation was needed. 3896 3897 Example usage: 3898 3899 Consider the following 8x8 matrix with 34 non-zero values, that is 3900 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3901 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3902 as follows: 3903 3904 .vb 3905 1 2 0 | 0 3 0 | 0 4 3906 Proc0 0 5 6 | 7 0 0 | 8 0 3907 9 0 10 | 11 0 0 | 12 0 3908 ------------------------------------- 3909 13 0 14 | 15 16 17 | 0 0 3910 Proc1 0 18 0 | 19 20 21 | 0 0 3911 0 0 0 | 22 23 0 | 24 0 3912 ------------------------------------- 3913 Proc2 25 26 27 | 0 0 28 | 29 0 3914 30 0 0 | 31 32 33 | 0 34 3915 .ve 3916 3917 This can be represented as a collection of submatrices as: 3918 3919 .vb 3920 A B C 3921 D E F 3922 G H I 3923 .ve 3924 3925 Where the submatrices A,B,C are owned by proc0, D,E,F are 3926 owned by proc1, G,H,I are owned by proc2. 3927 3928 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3929 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3930 The 'M','N' parameters are 8,8, and have the same values on all procs. 3931 3932 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3933 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3934 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3935 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3936 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3937 matrix, ans [DF] as another SeqAIJ matrix. 3938 3939 When d_nz, o_nz parameters are specified, d_nz storage elements are 3940 allocated for every row of the local diagonal submatrix, and o_nz 3941 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3942 One way to choose d_nz and o_nz is to use the max nonzerors per local 3943 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3944 In this case, the values of d_nz,o_nz are: 3945 .vb 3946 proc0 : dnz = 2, o_nz = 2 3947 proc1 : dnz = 3, o_nz = 2 3948 proc2 : dnz = 1, o_nz = 4 3949 .ve 3950 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3951 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3952 for proc3. i.e we are using 12+15+10=37 storage locations to store 3953 34 values. 3954 3955 When d_nnz, o_nnz parameters are specified, the storage is specified 3956 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3957 In the above case the values for d_nnz,o_nnz are: 3958 .vb 3959 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3960 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3961 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3962 .ve 3963 Here the space allocated is sum of all the above values i.e 34, and 3964 hence pre-allocation is perfect. 3965 3966 Level: intermediate 3967 3968 .keywords: matrix, aij, compressed row, sparse, parallel 3969 3970 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3971 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3972 @*/ 3973 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3974 { 3975 PetscErrorCode ierr; 3976 3977 PetscFunctionBegin; 3978 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3979 PetscValidType(B,1); 3980 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3981 PetscFunctionReturn(0); 3982 } 3983 3984 #undef __FUNCT__ 3985 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3986 /*@ 3987 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3988 CSR format the local rows. 3989 3990 Collective on MPI_Comm 3991 3992 Input Parameters: 3993 + comm - MPI communicator 3994 . m - number of local rows (Cannot be PETSC_DECIDE) 3995 . n - This value should be the same as the local size used in creating the 3996 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3997 calculated if N is given) For square matrices n is almost always m. 3998 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3999 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4000 . i - row indices 4001 . j - column indices 4002 - a - matrix values 4003 4004 Output Parameter: 4005 . mat - the matrix 4006 4007 Level: intermediate 4008 4009 Notes: 4010 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4011 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4012 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4013 4014 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4015 4016 The format which is used for the sparse matrix input, is equivalent to a 4017 row-major ordering.. i.e for the following matrix, the input data expected is 4018 as shown: 4019 4020 1 0 0 4021 2 0 3 P0 4022 ------- 4023 4 5 6 P1 4024 4025 Process0 [P0]: rows_owned=[0,1] 4026 i = {0,1,3} [size = nrow+1 = 2+1] 4027 j = {0,0,2} [size = nz = 6] 4028 v = {1,2,3} [size = nz = 6] 4029 4030 Process1 [P1]: rows_owned=[2] 4031 i = {0,3} [size = nrow+1 = 1+1] 4032 j = {0,1,2} [size = nz = 6] 4033 v = {4,5,6} [size = nz = 6] 4034 4035 .keywords: matrix, aij, compressed row, sparse, parallel 4036 4037 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4038 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4039 @*/ 4040 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4041 { 4042 PetscErrorCode ierr; 4043 4044 PetscFunctionBegin; 4045 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4046 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4047 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4048 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4049 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4050 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4051 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4052 PetscFunctionReturn(0); 4053 } 4054 4055 #undef __FUNCT__ 4056 #define __FUNCT__ "MatCreateAIJ" 4057 /*@C 4058 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4059 (the default parallel PETSc format). For good matrix assembly performance 4060 the user should preallocate the matrix storage by setting the parameters 4061 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4062 performance can be increased by more than a factor of 50. 4063 4064 Collective on MPI_Comm 4065 4066 Input Parameters: 4067 + comm - MPI communicator 4068 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4069 This value should be the same as the local size used in creating the 4070 y vector for the matrix-vector product y = Ax. 4071 . n - This value should be the same as the local size used in creating the 4072 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4073 calculated if N is given) For square matrices n is almost always m. 4074 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4075 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4076 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4077 (same value is used for all local rows) 4078 . d_nnz - array containing the number of nonzeros in the various rows of the 4079 DIAGONAL portion of the local submatrix (possibly different for each row) 4080 or NULL, if d_nz is used to specify the nonzero structure. 4081 The size of this array is equal to the number of local rows, i.e 'm'. 4082 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4083 submatrix (same value is used for all local rows). 4084 - o_nnz - array containing the number of nonzeros in the various rows of the 4085 OFF-DIAGONAL portion of the local submatrix (possibly different for 4086 each row) or NULL, if o_nz is used to specify the nonzero 4087 structure. The size of this array is equal to the number 4088 of local rows, i.e 'm'. 4089 4090 Output Parameter: 4091 . A - the matrix 4092 4093 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4094 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4095 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4096 4097 Notes: 4098 If the *_nnz parameter is given then the *_nz parameter is ignored 4099 4100 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4101 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4102 storage requirements for this matrix. 4103 4104 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4105 processor than it must be used on all processors that share the object for 4106 that argument. 4107 4108 The user MUST specify either the local or global matrix dimensions 4109 (possibly both). 4110 4111 The parallel matrix is partitioned across processors such that the 4112 first m0 rows belong to process 0, the next m1 rows belong to 4113 process 1, the next m2 rows belong to process 2 etc.. where 4114 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4115 values corresponding to [m x N] submatrix. 4116 4117 The columns are logically partitioned with the n0 columns belonging 4118 to 0th partition, the next n1 columns belonging to the next 4119 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4120 4121 The DIAGONAL portion of the local submatrix on any given processor 4122 is the submatrix corresponding to the rows and columns m,n 4123 corresponding to the given processor. i.e diagonal matrix on 4124 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4125 etc. The remaining portion of the local submatrix [m x (N-n)] 4126 constitute the OFF-DIAGONAL portion. The example below better 4127 illustrates this concept. 4128 4129 For a square global matrix we define each processor's diagonal portion 4130 to be its local rows and the corresponding columns (a square submatrix); 4131 each processor's off-diagonal portion encompasses the remainder of the 4132 local matrix (a rectangular submatrix). 4133 4134 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4135 4136 When calling this routine with a single process communicator, a matrix of 4137 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4138 type of communicator, use the construction mechanism: 4139 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4140 4141 By default, this format uses inodes (identical nodes) when possible. 4142 We search for consecutive rows with the same nonzero structure, thereby 4143 reusing matrix information to achieve increased efficiency. 4144 4145 Options Database Keys: 4146 + -mat_no_inode - Do not use inodes 4147 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4148 - -mat_aij_oneindex - Internally use indexing starting at 1 4149 rather than 0. Note that when calling MatSetValues(), 4150 the user still MUST index entries starting at 0! 4151 4152 4153 Example usage: 4154 4155 Consider the following 8x8 matrix with 34 non-zero values, that is 4156 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4157 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4158 as follows: 4159 4160 .vb 4161 1 2 0 | 0 3 0 | 0 4 4162 Proc0 0 5 6 | 7 0 0 | 8 0 4163 9 0 10 | 11 0 0 | 12 0 4164 ------------------------------------- 4165 13 0 14 | 15 16 17 | 0 0 4166 Proc1 0 18 0 | 19 20 21 | 0 0 4167 0 0 0 | 22 23 0 | 24 0 4168 ------------------------------------- 4169 Proc2 25 26 27 | 0 0 28 | 29 0 4170 30 0 0 | 31 32 33 | 0 34 4171 .ve 4172 4173 This can be represented as a collection of submatrices as: 4174 4175 .vb 4176 A B C 4177 D E F 4178 G H I 4179 .ve 4180 4181 Where the submatrices A,B,C are owned by proc0, D,E,F are 4182 owned by proc1, G,H,I are owned by proc2. 4183 4184 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4185 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4186 The 'M','N' parameters are 8,8, and have the same values on all procs. 4187 4188 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4189 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4190 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4191 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4192 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4193 matrix, ans [DF] as another SeqAIJ matrix. 4194 4195 When d_nz, o_nz parameters are specified, d_nz storage elements are 4196 allocated for every row of the local diagonal submatrix, and o_nz 4197 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4198 One way to choose d_nz and o_nz is to use the max nonzerors per local 4199 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4200 In this case, the values of d_nz,o_nz are: 4201 .vb 4202 proc0 : dnz = 2, o_nz = 2 4203 proc1 : dnz = 3, o_nz = 2 4204 proc2 : dnz = 1, o_nz = 4 4205 .ve 4206 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4207 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4208 for proc3. i.e we are using 12+15+10=37 storage locations to store 4209 34 values. 4210 4211 When d_nnz, o_nnz parameters are specified, the storage is specified 4212 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4213 In the above case the values for d_nnz,o_nnz are: 4214 .vb 4215 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4216 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4217 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4218 .ve 4219 Here the space allocated is sum of all the above values i.e 34, and 4220 hence pre-allocation is perfect. 4221 4222 Level: intermediate 4223 4224 .keywords: matrix, aij, compressed row, sparse, parallel 4225 4226 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4227 MPIAIJ, MatCreateMPIAIJWithArrays() 4228 @*/ 4229 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4230 { 4231 PetscErrorCode ierr; 4232 PetscMPIInt size; 4233 4234 PetscFunctionBegin; 4235 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4236 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4237 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4238 if (size > 1) { 4239 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4240 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4241 } else { 4242 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4243 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4244 } 4245 PetscFunctionReturn(0); 4246 } 4247 4248 #undef __FUNCT__ 4249 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4250 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4251 { 4252 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4253 4254 PetscFunctionBegin; 4255 *Ad = a->A; 4256 *Ao = a->B; 4257 *colmap = a->garray; 4258 PetscFunctionReturn(0); 4259 } 4260 4261 #undef __FUNCT__ 4262 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4263 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4264 { 4265 PetscErrorCode ierr; 4266 PetscInt i; 4267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4268 4269 PetscFunctionBegin; 4270 if (coloring->ctype == IS_COLORING_GLOBAL) { 4271 ISColoringValue *allcolors,*colors; 4272 ISColoring ocoloring; 4273 4274 /* set coloring for diagonal portion */ 4275 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4276 4277 /* set coloring for off-diagonal portion */ 4278 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4279 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4280 for (i=0; i<a->B->cmap->n; i++) { 4281 colors[i] = allcolors[a->garray[i]]; 4282 } 4283 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4284 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4285 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4286 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4287 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4288 ISColoringValue *colors; 4289 PetscInt *larray; 4290 ISColoring ocoloring; 4291 4292 /* set coloring for diagonal portion */ 4293 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4294 for (i=0; i<a->A->cmap->n; i++) { 4295 larray[i] = i + A->cmap->rstart; 4296 } 4297 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4298 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4299 for (i=0; i<a->A->cmap->n; i++) { 4300 colors[i] = coloring->colors[larray[i]]; 4301 } 4302 ierr = PetscFree(larray);CHKERRQ(ierr); 4303 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4304 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4305 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4306 4307 /* set coloring for off-diagonal portion */ 4308 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4309 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4310 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4311 for (i=0; i<a->B->cmap->n; i++) { 4312 colors[i] = coloring->colors[larray[i]]; 4313 } 4314 ierr = PetscFree(larray);CHKERRQ(ierr); 4315 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4316 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4317 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4318 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4319 PetscFunctionReturn(0); 4320 } 4321 4322 #undef __FUNCT__ 4323 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4324 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4325 { 4326 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4327 PetscErrorCode ierr; 4328 4329 PetscFunctionBegin; 4330 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4331 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4332 PetscFunctionReturn(0); 4333 } 4334 4335 #undef __FUNCT__ 4336 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4337 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4338 { 4339 PetscErrorCode ierr; 4340 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4341 PetscInt *indx; 4342 4343 PetscFunctionBegin; 4344 /* This routine will ONLY return MPIAIJ type matrix */ 4345 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4346 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4347 if (n == PETSC_DECIDE) { 4348 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4349 } 4350 /* Check sum(n) = N */ 4351 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4352 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4353 4354 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4355 rstart -= m; 4356 4357 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4358 for (i=0; i<m; i++) { 4359 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4360 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4361 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4362 } 4363 4364 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4365 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4366 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4367 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4368 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4369 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4370 PetscFunctionReturn(0); 4371 } 4372 4373 #undef __FUNCT__ 4374 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4375 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4376 { 4377 PetscErrorCode ierr; 4378 PetscInt m,N,i,rstart,nnz,Ii; 4379 PetscInt *indx; 4380 PetscScalar *values; 4381 4382 PetscFunctionBegin; 4383 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4384 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4385 for (i=0; i<m; i++) { 4386 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4387 Ii = i + rstart; 4388 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4389 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4390 } 4391 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4392 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4393 PetscFunctionReturn(0); 4394 } 4395 4396 #undef __FUNCT__ 4397 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4398 /*@ 4399 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4400 matrices from each processor 4401 4402 Collective on MPI_Comm 4403 4404 Input Parameters: 4405 + comm - the communicators the parallel matrix will live on 4406 . inmat - the input sequential matrices 4407 . n - number of local columns (or PETSC_DECIDE) 4408 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4409 4410 Output Parameter: 4411 . outmat - the parallel matrix generated 4412 4413 Level: advanced 4414 4415 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4416 4417 @*/ 4418 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4419 { 4420 PetscErrorCode ierr; 4421 PetscMPIInt size; 4422 4423 PetscFunctionBegin; 4424 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4425 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4426 if (size == 1) { 4427 if (scall == MAT_INITIAL_MATRIX) { 4428 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4429 } else { 4430 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4431 } 4432 } else { 4433 if (scall == MAT_INITIAL_MATRIX) { 4434 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4435 } 4436 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4437 } 4438 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4439 PetscFunctionReturn(0); 4440 } 4441 4442 #undef __FUNCT__ 4443 #define __FUNCT__ "MatFileSplit" 4444 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4445 { 4446 PetscErrorCode ierr; 4447 PetscMPIInt rank; 4448 PetscInt m,N,i,rstart,nnz; 4449 size_t len; 4450 const PetscInt *indx; 4451 PetscViewer out; 4452 char *name; 4453 Mat B; 4454 const PetscScalar *values; 4455 4456 PetscFunctionBegin; 4457 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4458 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4459 /* Should this be the type of the diagonal block of A? */ 4460 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4461 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4462 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4463 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4464 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4465 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4466 for (i=0; i<m; i++) { 4467 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4468 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4469 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4470 } 4471 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4472 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4473 4474 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4475 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4476 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4477 sprintf(name,"%s.%d",outfile,rank); 4478 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4479 ierr = PetscFree(name);CHKERRQ(ierr); 4480 ierr = MatView(B,out);CHKERRQ(ierr); 4481 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4482 ierr = MatDestroy(&B);CHKERRQ(ierr); 4483 PetscFunctionReturn(0); 4484 } 4485 4486 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4487 #undef __FUNCT__ 4488 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4489 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4490 { 4491 PetscErrorCode ierr; 4492 Mat_Merge_SeqsToMPI *merge; 4493 PetscContainer container; 4494 4495 PetscFunctionBegin; 4496 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4497 if (container) { 4498 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4505 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4506 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4507 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4508 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4509 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4510 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4511 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4512 ierr = PetscFree(merge);CHKERRQ(ierr); 4513 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4514 } 4515 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4516 PetscFunctionReturn(0); 4517 } 4518 4519 #include <../src/mat/utils/freespace.h> 4520 #include <petscbt.h> 4521 4522 #undef __FUNCT__ 4523 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4524 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4525 { 4526 PetscErrorCode ierr; 4527 MPI_Comm comm; 4528 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4529 PetscMPIInt size,rank,taga,*len_s; 4530 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4531 PetscInt proc,m; 4532 PetscInt **buf_ri,**buf_rj; 4533 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4534 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4535 MPI_Request *s_waits,*r_waits; 4536 MPI_Status *status; 4537 MatScalar *aa=a->a; 4538 MatScalar **abuf_r,*ba_i; 4539 Mat_Merge_SeqsToMPI *merge; 4540 PetscContainer container; 4541 4542 PetscFunctionBegin; 4543 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4544 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4545 4546 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4547 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4548 4549 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4550 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4551 4552 bi = merge->bi; 4553 bj = merge->bj; 4554 buf_ri = merge->buf_ri; 4555 buf_rj = merge->buf_rj; 4556 4557 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4558 owners = merge->rowmap->range; 4559 len_s = merge->len_s; 4560 4561 /* send and recv matrix values */ 4562 /*-----------------------------*/ 4563 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4564 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4565 4566 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4567 for (proc=0,k=0; proc<size; proc++) { 4568 if (!len_s[proc]) continue; 4569 i = owners[proc]; 4570 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4571 k++; 4572 } 4573 4574 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4575 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4576 ierr = PetscFree(status);CHKERRQ(ierr); 4577 4578 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4579 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4580 4581 /* insert mat values of mpimat */ 4582 /*----------------------------*/ 4583 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4584 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4585 4586 for (k=0; k<merge->nrecv; k++) { 4587 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4588 nrows = *(buf_ri_k[k]); 4589 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4590 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4591 } 4592 4593 /* set values of ba */ 4594 m = merge->rowmap->n; 4595 for (i=0; i<m; i++) { 4596 arow = owners[rank] + i; 4597 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4598 bnzi = bi[i+1] - bi[i]; 4599 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4600 4601 /* add local non-zero vals of this proc's seqmat into ba */ 4602 anzi = ai[arow+1] - ai[arow]; 4603 aj = a->j + ai[arow]; 4604 aa = a->a + ai[arow]; 4605 nextaj = 0; 4606 for (j=0; nextaj<anzi; j++) { 4607 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4608 ba_i[j] += aa[nextaj++]; 4609 } 4610 } 4611 4612 /* add received vals into ba */ 4613 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4614 /* i-th row */ 4615 if (i == *nextrow[k]) { 4616 anzi = *(nextai[k]+1) - *nextai[k]; 4617 aj = buf_rj[k] + *(nextai[k]); 4618 aa = abuf_r[k] + *(nextai[k]); 4619 nextaj = 0; 4620 for (j=0; nextaj<anzi; j++) { 4621 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4622 ba_i[j] += aa[nextaj++]; 4623 } 4624 } 4625 nextrow[k]++; nextai[k]++; 4626 } 4627 } 4628 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4629 } 4630 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4631 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4632 4633 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4634 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4635 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4636 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4637 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4638 PetscFunctionReturn(0); 4639 } 4640 4641 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4642 4643 #undef __FUNCT__ 4644 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4645 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4646 { 4647 PetscErrorCode ierr; 4648 Mat B_mpi; 4649 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4650 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4651 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4652 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4653 PetscInt len,proc,*dnz,*onz,bs,cbs; 4654 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4655 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4656 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4657 MPI_Status *status; 4658 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4659 PetscBT lnkbt; 4660 Mat_Merge_SeqsToMPI *merge; 4661 PetscContainer container; 4662 4663 PetscFunctionBegin; 4664 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4665 4666 /* make sure it is a PETSc comm */ 4667 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4668 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4669 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4670 4671 ierr = PetscNew(&merge);CHKERRQ(ierr); 4672 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4673 4674 /* determine row ownership */ 4675 /*---------------------------------------------------------*/ 4676 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4677 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4678 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4679 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4680 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4681 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4682 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4683 4684 m = merge->rowmap->n; 4685 owners = merge->rowmap->range; 4686 4687 /* determine the number of messages to send, their lengths */ 4688 /*---------------------------------------------------------*/ 4689 len_s = merge->len_s; 4690 4691 len = 0; /* length of buf_si[] */ 4692 merge->nsend = 0; 4693 for (proc=0; proc<size; proc++) { 4694 len_si[proc] = 0; 4695 if (proc == rank) { 4696 len_s[proc] = 0; 4697 } else { 4698 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4699 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4700 } 4701 if (len_s[proc]) { 4702 merge->nsend++; 4703 nrows = 0; 4704 for (i=owners[proc]; i<owners[proc+1]; i++) { 4705 if (ai[i+1] > ai[i]) nrows++; 4706 } 4707 len_si[proc] = 2*(nrows+1); 4708 len += len_si[proc]; 4709 } 4710 } 4711 4712 /* determine the number and length of messages to receive for ij-structure */ 4713 /*-------------------------------------------------------------------------*/ 4714 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4715 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4716 4717 /* post the Irecv of j-structure */ 4718 /*-------------------------------*/ 4719 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4720 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4721 4722 /* post the Isend of j-structure */ 4723 /*--------------------------------*/ 4724 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4725 4726 for (proc=0, k=0; proc<size; proc++) { 4727 if (!len_s[proc]) continue; 4728 i = owners[proc]; 4729 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4730 k++; 4731 } 4732 4733 /* receives and sends of j-structure are complete */ 4734 /*------------------------------------------------*/ 4735 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4736 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4737 4738 /* send and recv i-structure */ 4739 /*---------------------------*/ 4740 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4741 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4742 4743 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4744 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4745 for (proc=0,k=0; proc<size; proc++) { 4746 if (!len_s[proc]) continue; 4747 /* form outgoing message for i-structure: 4748 buf_si[0]: nrows to be sent 4749 [1:nrows]: row index (global) 4750 [nrows+1:2*nrows+1]: i-structure index 4751 */ 4752 /*-------------------------------------------*/ 4753 nrows = len_si[proc]/2 - 1; 4754 buf_si_i = buf_si + nrows+1; 4755 buf_si[0] = nrows; 4756 buf_si_i[0] = 0; 4757 nrows = 0; 4758 for (i=owners[proc]; i<owners[proc+1]; i++) { 4759 anzi = ai[i+1] - ai[i]; 4760 if (anzi) { 4761 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4762 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4763 nrows++; 4764 } 4765 } 4766 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4767 k++; 4768 buf_si += len_si[proc]; 4769 } 4770 4771 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4772 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4773 4774 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4775 for (i=0; i<merge->nrecv; i++) { 4776 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4777 } 4778 4779 ierr = PetscFree(len_si);CHKERRQ(ierr); 4780 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4781 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4782 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4783 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4784 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4785 ierr = PetscFree(status);CHKERRQ(ierr); 4786 4787 /* compute a local seq matrix in each processor */ 4788 /*----------------------------------------------*/ 4789 /* allocate bi array and free space for accumulating nonzero column info */ 4790 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4791 bi[0] = 0; 4792 4793 /* create and initialize a linked list */ 4794 nlnk = N+1; 4795 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4796 4797 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4798 len = ai[owners[rank+1]] - ai[owners[rank]]; 4799 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4800 4801 current_space = free_space; 4802 4803 /* determine symbolic info for each local row */ 4804 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4805 4806 for (k=0; k<merge->nrecv; k++) { 4807 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4808 nrows = *buf_ri_k[k]; 4809 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4810 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4811 } 4812 4813 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4814 len = 0; 4815 for (i=0; i<m; i++) { 4816 bnzi = 0; 4817 /* add local non-zero cols of this proc's seqmat into lnk */ 4818 arow = owners[rank] + i; 4819 anzi = ai[arow+1] - ai[arow]; 4820 aj = a->j + ai[arow]; 4821 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4822 bnzi += nlnk; 4823 /* add received col data into lnk */ 4824 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4825 if (i == *nextrow[k]) { /* i-th row */ 4826 anzi = *(nextai[k]+1) - *nextai[k]; 4827 aj = buf_rj[k] + *nextai[k]; 4828 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4829 bnzi += nlnk; 4830 nextrow[k]++; nextai[k]++; 4831 } 4832 } 4833 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4834 4835 /* if free space is not available, make more free space */ 4836 if (current_space->local_remaining<bnzi) { 4837 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4838 nspacedouble++; 4839 } 4840 /* copy data into free space, then initialize lnk */ 4841 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4842 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4843 4844 current_space->array += bnzi; 4845 current_space->local_used += bnzi; 4846 current_space->local_remaining -= bnzi; 4847 4848 bi[i+1] = bi[i] + bnzi; 4849 } 4850 4851 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4852 4853 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4854 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4855 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4856 4857 /* create symbolic parallel matrix B_mpi */ 4858 /*---------------------------------------*/ 4859 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4860 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4861 if (n==PETSC_DECIDE) { 4862 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4863 } else { 4864 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4865 } 4866 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4867 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4868 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4869 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4870 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4871 4872 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4873 B_mpi->assembled = PETSC_FALSE; 4874 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4875 merge->bi = bi; 4876 merge->bj = bj; 4877 merge->buf_ri = buf_ri; 4878 merge->buf_rj = buf_rj; 4879 merge->coi = NULL; 4880 merge->coj = NULL; 4881 merge->owners_co = NULL; 4882 4883 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4884 4885 /* attach the supporting struct to B_mpi for reuse */ 4886 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4887 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4888 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4889 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4890 *mpimat = B_mpi; 4891 4892 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4893 PetscFunctionReturn(0); 4894 } 4895 4896 #undef __FUNCT__ 4897 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4898 /*@C 4899 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4900 matrices from each processor 4901 4902 Collective on MPI_Comm 4903 4904 Input Parameters: 4905 + comm - the communicators the parallel matrix will live on 4906 . seqmat - the input sequential matrices 4907 . m - number of local rows (or PETSC_DECIDE) 4908 . n - number of local columns (or PETSC_DECIDE) 4909 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4910 4911 Output Parameter: 4912 . mpimat - the parallel matrix generated 4913 4914 Level: advanced 4915 4916 Notes: 4917 The dimensions of the sequential matrix in each processor MUST be the same. 4918 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4919 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4920 @*/ 4921 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4922 { 4923 PetscErrorCode ierr; 4924 PetscMPIInt size; 4925 4926 PetscFunctionBegin; 4927 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4928 if (size == 1) { 4929 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4930 if (scall == MAT_INITIAL_MATRIX) { 4931 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4932 } else { 4933 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4934 } 4935 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4936 PetscFunctionReturn(0); 4937 } 4938 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4939 if (scall == MAT_INITIAL_MATRIX) { 4940 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4941 } 4942 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4943 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4944 PetscFunctionReturn(0); 4945 } 4946 4947 #undef __FUNCT__ 4948 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4949 /*@ 4950 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4951 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4952 with MatGetSize() 4953 4954 Not Collective 4955 4956 Input Parameters: 4957 + A - the matrix 4958 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4959 4960 Output Parameter: 4961 . A_loc - the local sequential matrix generated 4962 4963 Level: developer 4964 4965 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4966 4967 @*/ 4968 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4969 { 4970 PetscErrorCode ierr; 4971 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4972 Mat_SeqAIJ *mat,*a,*b; 4973 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4974 MatScalar *aa,*ba,*cam; 4975 PetscScalar *ca; 4976 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4977 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4978 PetscBool match; 4979 4980 PetscFunctionBegin; 4981 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4982 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4983 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4984 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4985 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4986 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4987 aa = a->a; ba = b->a; 4988 if (scall == MAT_INITIAL_MATRIX) { 4989 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4990 ci[0] = 0; 4991 for (i=0; i<am; i++) { 4992 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4993 } 4994 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4995 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4996 k = 0; 4997 for (i=0; i<am; i++) { 4998 ncols_o = bi[i+1] - bi[i]; 4999 ncols_d = ai[i+1] - ai[i]; 5000 /* off-diagonal portion of A */ 5001 for (jo=0; jo<ncols_o; jo++) { 5002 col = cmap[*bj]; 5003 if (col >= cstart) break; 5004 cj[k] = col; bj++; 5005 ca[k++] = *ba++; 5006 } 5007 /* diagonal portion of A */ 5008 for (j=0; j<ncols_d; j++) { 5009 cj[k] = cstart + *aj++; 5010 ca[k++] = *aa++; 5011 } 5012 /* off-diagonal portion of A */ 5013 for (j=jo; j<ncols_o; j++) { 5014 cj[k] = cmap[*bj++]; 5015 ca[k++] = *ba++; 5016 } 5017 } 5018 /* put together the new matrix */ 5019 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5020 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5021 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5022 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5023 mat->free_a = PETSC_TRUE; 5024 mat->free_ij = PETSC_TRUE; 5025 mat->nonew = 0; 5026 } else if (scall == MAT_REUSE_MATRIX) { 5027 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5028 ci = mat->i; cj = mat->j; cam = mat->a; 5029 for (i=0; i<am; i++) { 5030 /* off-diagonal portion of A */ 5031 ncols_o = bi[i+1] - bi[i]; 5032 for (jo=0; jo<ncols_o; jo++) { 5033 col = cmap[*bj]; 5034 if (col >= cstart) break; 5035 *cam++ = *ba++; bj++; 5036 } 5037 /* diagonal portion of A */ 5038 ncols_d = ai[i+1] - ai[i]; 5039 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5040 /* off-diagonal portion of A */ 5041 for (j=jo; j<ncols_o; j++) { 5042 *cam++ = *ba++; bj++; 5043 } 5044 } 5045 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5046 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5047 PetscFunctionReturn(0); 5048 } 5049 5050 #undef __FUNCT__ 5051 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5052 /*@C 5053 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5054 5055 Not Collective 5056 5057 Input Parameters: 5058 + A - the matrix 5059 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5060 - row, col - index sets of rows and columns to extract (or NULL) 5061 5062 Output Parameter: 5063 . A_loc - the local sequential matrix generated 5064 5065 Level: developer 5066 5067 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5068 5069 @*/ 5070 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5071 { 5072 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5073 PetscErrorCode ierr; 5074 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5075 IS isrowa,iscola; 5076 Mat *aloc; 5077 PetscBool match; 5078 5079 PetscFunctionBegin; 5080 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5081 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5082 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5083 if (!row) { 5084 start = A->rmap->rstart; end = A->rmap->rend; 5085 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5086 } else { 5087 isrowa = *row; 5088 } 5089 if (!col) { 5090 start = A->cmap->rstart; 5091 cmap = a->garray; 5092 nzA = a->A->cmap->n; 5093 nzB = a->B->cmap->n; 5094 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5095 ncols = 0; 5096 for (i=0; i<nzB; i++) { 5097 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5098 else break; 5099 } 5100 imark = i; 5101 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5102 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5103 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5104 } else { 5105 iscola = *col; 5106 } 5107 if (scall != MAT_INITIAL_MATRIX) { 5108 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5109 aloc[0] = *A_loc; 5110 } 5111 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5112 *A_loc = aloc[0]; 5113 ierr = PetscFree(aloc);CHKERRQ(ierr); 5114 if (!row) { 5115 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5116 } 5117 if (!col) { 5118 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5119 } 5120 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5121 PetscFunctionReturn(0); 5122 } 5123 5124 #undef __FUNCT__ 5125 #define __FUNCT__ "MatGetBrowsOfAcols" 5126 /*@C 5127 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5128 5129 Collective on Mat 5130 5131 Input Parameters: 5132 + A,B - the matrices in mpiaij format 5133 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5134 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5135 5136 Output Parameter: 5137 + rowb, colb - index sets of rows and columns of B to extract 5138 - B_seq - the sequential matrix generated 5139 5140 Level: developer 5141 5142 @*/ 5143 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5144 { 5145 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5146 PetscErrorCode ierr; 5147 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5148 IS isrowb,iscolb; 5149 Mat *bseq=NULL; 5150 5151 PetscFunctionBegin; 5152 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5153 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5154 } 5155 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5156 5157 if (scall == MAT_INITIAL_MATRIX) { 5158 start = A->cmap->rstart; 5159 cmap = a->garray; 5160 nzA = a->A->cmap->n; 5161 nzB = a->B->cmap->n; 5162 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5163 ncols = 0; 5164 for (i=0; i<nzB; i++) { /* row < local row index */ 5165 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5166 else break; 5167 } 5168 imark = i; 5169 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5170 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5171 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5172 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5173 } else { 5174 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5175 isrowb = *rowb; iscolb = *colb; 5176 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5177 bseq[0] = *B_seq; 5178 } 5179 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5180 *B_seq = bseq[0]; 5181 ierr = PetscFree(bseq);CHKERRQ(ierr); 5182 if (!rowb) { 5183 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5184 } else { 5185 *rowb = isrowb; 5186 } 5187 if (!colb) { 5188 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5189 } else { 5190 *colb = iscolb; 5191 } 5192 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5193 PetscFunctionReturn(0); 5194 } 5195 5196 #undef __FUNCT__ 5197 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5198 /* 5199 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5200 of the OFF-DIAGONAL portion of local A 5201 5202 Collective on Mat 5203 5204 Input Parameters: 5205 + A,B - the matrices in mpiaij format 5206 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5207 5208 Output Parameter: 5209 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5210 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5211 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5212 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5213 5214 Level: developer 5215 5216 */ 5217 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5218 { 5219 VecScatter_MPI_General *gen_to,*gen_from; 5220 PetscErrorCode ierr; 5221 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5222 Mat_SeqAIJ *b_oth; 5223 VecScatter ctx =a->Mvctx; 5224 MPI_Comm comm; 5225 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5226 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5227 PetscScalar *rvalues,*svalues; 5228 MatScalar *b_otha,*bufa,*bufA; 5229 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5230 MPI_Request *rwaits = NULL,*swaits = NULL; 5231 MPI_Status *sstatus,rstatus; 5232 PetscMPIInt jj; 5233 PetscInt *cols,sbs,rbs; 5234 PetscScalar *vals; 5235 5236 PetscFunctionBegin; 5237 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5238 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5239 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5240 } 5241 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5242 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5243 5244 gen_to = (VecScatter_MPI_General*)ctx->todata; 5245 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5246 rvalues = gen_from->values; /* holds the length of receiving row */ 5247 svalues = gen_to->values; /* holds the length of sending row */ 5248 nrecvs = gen_from->n; 5249 nsends = gen_to->n; 5250 5251 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5252 srow = gen_to->indices; /* local row index to be sent */ 5253 sstarts = gen_to->starts; 5254 sprocs = gen_to->procs; 5255 sstatus = gen_to->sstatus; 5256 sbs = gen_to->bs; 5257 rstarts = gen_from->starts; 5258 rprocs = gen_from->procs; 5259 rbs = gen_from->bs; 5260 5261 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5262 if (scall == MAT_INITIAL_MATRIX) { 5263 /* i-array */ 5264 /*---------*/ 5265 /* post receives */ 5266 for (i=0; i<nrecvs; i++) { 5267 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5268 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5269 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5270 } 5271 5272 /* pack the outgoing message */ 5273 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5274 5275 sstartsj[0] = 0; 5276 rstartsj[0] = 0; 5277 len = 0; /* total length of j or a array to be sent */ 5278 k = 0; 5279 for (i=0; i<nsends; i++) { 5280 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5281 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5282 for (j=0; j<nrows; j++) { 5283 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5284 for (l=0; l<sbs; l++) { 5285 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5286 5287 rowlen[j*sbs+l] = ncols; 5288 5289 len += ncols; 5290 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5291 } 5292 k++; 5293 } 5294 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5295 5296 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5297 } 5298 /* recvs and sends of i-array are completed */ 5299 i = nrecvs; 5300 while (i--) { 5301 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5302 } 5303 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5304 5305 /* allocate buffers for sending j and a arrays */ 5306 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5307 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5308 5309 /* create i-array of B_oth */ 5310 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5311 5312 b_othi[0] = 0; 5313 len = 0; /* total length of j or a array to be received */ 5314 k = 0; 5315 for (i=0; i<nrecvs; i++) { 5316 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5317 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5318 for (j=0; j<nrows; j++) { 5319 b_othi[k+1] = b_othi[k] + rowlen[j]; 5320 len += rowlen[j]; k++; 5321 } 5322 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5323 } 5324 5325 /* allocate space for j and a arrrays of B_oth */ 5326 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5327 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5328 5329 /* j-array */ 5330 /*---------*/ 5331 /* post receives of j-array */ 5332 for (i=0; i<nrecvs; i++) { 5333 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5334 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5335 } 5336 5337 /* pack the outgoing message j-array */ 5338 k = 0; 5339 for (i=0; i<nsends; i++) { 5340 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5341 bufJ = bufj+sstartsj[i]; 5342 for (j=0; j<nrows; j++) { 5343 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5344 for (ll=0; ll<sbs; ll++) { 5345 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5346 for (l=0; l<ncols; l++) { 5347 *bufJ++ = cols[l]; 5348 } 5349 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5350 } 5351 } 5352 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5353 } 5354 5355 /* recvs and sends of j-array are completed */ 5356 i = nrecvs; 5357 while (i--) { 5358 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5359 } 5360 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5361 } else if (scall == MAT_REUSE_MATRIX) { 5362 sstartsj = *startsj_s; 5363 rstartsj = *startsj_r; 5364 bufa = *bufa_ptr; 5365 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5366 b_otha = b_oth->a; 5367 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5368 5369 /* a-array */ 5370 /*---------*/ 5371 /* post receives of a-array */ 5372 for (i=0; i<nrecvs; i++) { 5373 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5374 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5375 } 5376 5377 /* pack the outgoing message a-array */ 5378 k = 0; 5379 for (i=0; i<nsends; i++) { 5380 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5381 bufA = bufa+sstartsj[i]; 5382 for (j=0; j<nrows; j++) { 5383 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5384 for (ll=0; ll<sbs; ll++) { 5385 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5386 for (l=0; l<ncols; l++) { 5387 *bufA++ = vals[l]; 5388 } 5389 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5390 } 5391 } 5392 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5393 } 5394 /* recvs and sends of a-array are completed */ 5395 i = nrecvs; 5396 while (i--) { 5397 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5398 } 5399 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5400 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5401 5402 if (scall == MAT_INITIAL_MATRIX) { 5403 /* put together the new matrix */ 5404 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5405 5406 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5407 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5408 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5409 b_oth->free_a = PETSC_TRUE; 5410 b_oth->free_ij = PETSC_TRUE; 5411 b_oth->nonew = 0; 5412 5413 ierr = PetscFree(bufj);CHKERRQ(ierr); 5414 if (!startsj_s || !bufa_ptr) { 5415 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5416 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5417 } else { 5418 *startsj_s = sstartsj; 5419 *startsj_r = rstartsj; 5420 *bufa_ptr = bufa; 5421 } 5422 } 5423 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5424 PetscFunctionReturn(0); 5425 } 5426 5427 #undef __FUNCT__ 5428 #define __FUNCT__ "MatGetCommunicationStructs" 5429 /*@C 5430 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5431 5432 Not Collective 5433 5434 Input Parameters: 5435 . A - The matrix in mpiaij format 5436 5437 Output Parameter: 5438 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5439 . colmap - A map from global column index to local index into lvec 5440 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5441 5442 Level: developer 5443 5444 @*/ 5445 #if defined(PETSC_USE_CTABLE) 5446 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5447 #else 5448 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5449 #endif 5450 { 5451 Mat_MPIAIJ *a; 5452 5453 PetscFunctionBegin; 5454 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5455 PetscValidPointer(lvec, 2); 5456 PetscValidPointer(colmap, 3); 5457 PetscValidPointer(multScatter, 4); 5458 a = (Mat_MPIAIJ*) A->data; 5459 if (lvec) *lvec = a->lvec; 5460 if (colmap) *colmap = a->colmap; 5461 if (multScatter) *multScatter = a->Mvctx; 5462 PetscFunctionReturn(0); 5463 } 5464 5465 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5466 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5467 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5468 5469 #undef __FUNCT__ 5470 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5471 /* 5472 Computes (B'*A')' since computing B*A directly is untenable 5473 5474 n p p 5475 ( ) ( ) ( ) 5476 m ( A ) * n ( B ) = m ( C ) 5477 ( ) ( ) ( ) 5478 5479 */ 5480 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5481 { 5482 PetscErrorCode ierr; 5483 Mat At,Bt,Ct; 5484 5485 PetscFunctionBegin; 5486 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5487 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5488 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5489 ierr = MatDestroy(&At);CHKERRQ(ierr); 5490 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5491 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5492 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5493 PetscFunctionReturn(0); 5494 } 5495 5496 #undef __FUNCT__ 5497 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5498 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5499 { 5500 PetscErrorCode ierr; 5501 PetscInt m=A->rmap->n,n=B->cmap->n; 5502 Mat Cmat; 5503 5504 PetscFunctionBegin; 5505 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5506 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5507 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5508 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5509 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5510 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5511 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5512 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5513 5514 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5515 5516 *C = Cmat; 5517 PetscFunctionReturn(0); 5518 } 5519 5520 /* ----------------------------------------------------------------*/ 5521 #undef __FUNCT__ 5522 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5523 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5524 { 5525 PetscErrorCode ierr; 5526 5527 PetscFunctionBegin; 5528 if (scall == MAT_INITIAL_MATRIX) { 5529 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5530 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5531 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5532 } 5533 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5534 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5535 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5536 PetscFunctionReturn(0); 5537 } 5538 5539 #if defined(PETSC_HAVE_MUMPS) 5540 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5541 #endif 5542 #if defined(PETSC_HAVE_PASTIX) 5543 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5544 #endif 5545 #if defined(PETSC_HAVE_SUPERLU_DIST) 5546 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5547 #endif 5548 #if defined(PETSC_HAVE_CLIQUE) 5549 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5550 #endif 5551 5552 /*MC 5553 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5554 5555 Options Database Keys: 5556 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5557 5558 Level: beginner 5559 5560 .seealso: MatCreateAIJ() 5561 M*/ 5562 5563 #undef __FUNCT__ 5564 #define __FUNCT__ "MatCreate_MPIAIJ" 5565 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5566 { 5567 Mat_MPIAIJ *b; 5568 PetscErrorCode ierr; 5569 PetscMPIInt size; 5570 5571 PetscFunctionBegin; 5572 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5573 5574 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5575 B->data = (void*)b; 5576 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5577 B->assembled = PETSC_FALSE; 5578 B->insertmode = NOT_SET_VALUES; 5579 b->size = size; 5580 5581 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5582 5583 /* build cache for off array entries formed */ 5584 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5585 5586 b->donotstash = PETSC_FALSE; 5587 b->colmap = 0; 5588 b->garray = 0; 5589 b->roworiented = PETSC_TRUE; 5590 5591 /* stuff used for matrix vector multiply */ 5592 b->lvec = NULL; 5593 b->Mvctx = NULL; 5594 5595 /* stuff for MatGetRow() */ 5596 b->rowindices = 0; 5597 b->rowvalues = 0; 5598 b->getrowactive = PETSC_FALSE; 5599 5600 /* flexible pointer used in CUSP/CUSPARSE classes */ 5601 b->spptr = NULL; 5602 5603 #if defined(PETSC_HAVE_MUMPS) 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5605 #endif 5606 #if defined(PETSC_HAVE_PASTIX) 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5608 #endif 5609 #if defined(PETSC_HAVE_SUPERLU_DIST) 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5611 #endif 5612 #if defined(PETSC_HAVE_CLIQUE) 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5614 #endif 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5624 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5626 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5627 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5628 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5629 PetscFunctionReturn(0); 5630 } 5631 5632 #undef __FUNCT__ 5633 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5634 /*@ 5635 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5636 and "off-diagonal" part of the matrix in CSR format. 5637 5638 Collective on MPI_Comm 5639 5640 Input Parameters: 5641 + comm - MPI communicator 5642 . m - number of local rows (Cannot be PETSC_DECIDE) 5643 . n - This value should be the same as the local size used in creating the 5644 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5645 calculated if N is given) For square matrices n is almost always m. 5646 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5647 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5648 . i - row indices for "diagonal" portion of matrix 5649 . j - column indices 5650 . a - matrix values 5651 . oi - row indices for "off-diagonal" portion of matrix 5652 . oj - column indices 5653 - oa - matrix values 5654 5655 Output Parameter: 5656 . mat - the matrix 5657 5658 Level: advanced 5659 5660 Notes: 5661 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5662 must free the arrays once the matrix has been destroyed and not before. 5663 5664 The i and j indices are 0 based 5665 5666 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5667 5668 This sets local rows and cannot be used to set off-processor values. 5669 5670 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5671 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5672 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5673 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5674 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5675 communication if it is known that only local entries will be set. 5676 5677 .keywords: matrix, aij, compressed row, sparse, parallel 5678 5679 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5680 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5681 @*/ 5682 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5683 { 5684 PetscErrorCode ierr; 5685 Mat_MPIAIJ *maij; 5686 5687 PetscFunctionBegin; 5688 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5689 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5690 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5691 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5692 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5693 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5694 maij = (Mat_MPIAIJ*) (*mat)->data; 5695 5696 (*mat)->preallocated = PETSC_TRUE; 5697 5698 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5699 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5700 5701 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5702 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5703 5704 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5705 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5706 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5707 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5708 5709 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5710 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5711 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5712 PetscFunctionReturn(0); 5713 } 5714 5715 /* 5716 Special version for direct calls from Fortran 5717 */ 5718 #include <petsc-private/fortranimpl.h> 5719 5720 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5721 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5722 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5723 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5724 #endif 5725 5726 /* Change these macros so can be used in void function */ 5727 #undef CHKERRQ 5728 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5729 #undef SETERRQ2 5730 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5731 #undef SETERRQ3 5732 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5733 #undef SETERRQ 5734 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5735 5736 #undef __FUNCT__ 5737 #define __FUNCT__ "matsetvaluesmpiaij_" 5738 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5739 { 5740 Mat mat = *mmat; 5741 PetscInt m = *mm, n = *mn; 5742 InsertMode addv = *maddv; 5743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5744 PetscScalar value; 5745 PetscErrorCode ierr; 5746 5747 MatCheckPreallocated(mat,1); 5748 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5749 5750 #if defined(PETSC_USE_DEBUG) 5751 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5752 #endif 5753 { 5754 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5755 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5756 PetscBool roworiented = aij->roworiented; 5757 5758 /* Some Variables required in the macro */ 5759 Mat A = aij->A; 5760 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5761 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5762 MatScalar *aa = a->a; 5763 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5764 Mat B = aij->B; 5765 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5766 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5767 MatScalar *ba = b->a; 5768 5769 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5770 PetscInt nonew = a->nonew; 5771 MatScalar *ap1,*ap2; 5772 5773 PetscFunctionBegin; 5774 for (i=0; i<m; i++) { 5775 if (im[i] < 0) continue; 5776 #if defined(PETSC_USE_DEBUG) 5777 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5778 #endif 5779 if (im[i] >= rstart && im[i] < rend) { 5780 row = im[i] - rstart; 5781 lastcol1 = -1; 5782 rp1 = aj + ai[row]; 5783 ap1 = aa + ai[row]; 5784 rmax1 = aimax[row]; 5785 nrow1 = ailen[row]; 5786 low1 = 0; 5787 high1 = nrow1; 5788 lastcol2 = -1; 5789 rp2 = bj + bi[row]; 5790 ap2 = ba + bi[row]; 5791 rmax2 = bimax[row]; 5792 nrow2 = bilen[row]; 5793 low2 = 0; 5794 high2 = nrow2; 5795 5796 for (j=0; j<n; j++) { 5797 if (roworiented) value = v[i*n+j]; 5798 else value = v[i+j*m]; 5799 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5800 if (in[j] >= cstart && in[j] < cend) { 5801 col = in[j] - cstart; 5802 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5803 } else if (in[j] < 0) continue; 5804 #if defined(PETSC_USE_DEBUG) 5805 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5806 #endif 5807 else { 5808 if (mat->was_assembled) { 5809 if (!aij->colmap) { 5810 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5811 } 5812 #if defined(PETSC_USE_CTABLE) 5813 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5814 col--; 5815 #else 5816 col = aij->colmap[in[j]] - 1; 5817 #endif 5818 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5819 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5820 col = in[j]; 5821 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5822 B = aij->B; 5823 b = (Mat_SeqAIJ*)B->data; 5824 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5825 rp2 = bj + bi[row]; 5826 ap2 = ba + bi[row]; 5827 rmax2 = bimax[row]; 5828 nrow2 = bilen[row]; 5829 low2 = 0; 5830 high2 = nrow2; 5831 bm = aij->B->rmap->n; 5832 ba = b->a; 5833 } 5834 } else col = in[j]; 5835 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5836 } 5837 } 5838 } else if (!aij->donotstash) { 5839 if (roworiented) { 5840 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5841 } else { 5842 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5843 } 5844 } 5845 } 5846 } 5847 PetscFunctionReturnVoid(); 5848 } 5849 5850