1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap->N; 932 header[2] = mat->cmap->N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap->n; 938 range = mat->rmap->range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap->n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap->n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap->n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap->n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap->N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 1114 1115 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1116 if (!rank) { 1117 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1118 } else { 1119 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1120 } 1121 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1122 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1123 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1125 1126 /* copy over the A part */ 1127 Aloc = (Mat_SeqAIJ*)aij->A->data; 1128 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1129 row = mat->rmap->rstart; 1130 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1131 for (i=0; i<m; i++) { 1132 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1133 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1134 } 1135 aj = Aloc->j; 1136 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1137 1138 /* copy over the B part */ 1139 Aloc = (Mat_SeqAIJ*)aij->B->data; 1140 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1141 row = mat->rmap->rstart; 1142 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1143 ct = cols; 1144 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1148 } 1149 ierr = PetscFree(ct);CHKERRQ(ierr); 1150 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1151 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 /* 1153 Everyone has to call to draw the matrix since the graphics waits are 1154 synchronized across all processors that share the PetscDraw object 1155 */ 1156 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1157 if (!rank) { 1158 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1159 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1160 } 1161 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1162 ierr = MatDestroy(A);CHKERRQ(ierr); 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatView_MPIAIJ" 1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscErrorCode ierr; 1172 PetscTruth iascii,isdraw,issocket,isbinary; 1173 1174 PetscFunctionBegin; 1175 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1176 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1177 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1178 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1179 if (iascii || isdraw || isbinary || issocket) { 1180 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1181 } else { 1182 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRelax_MPIAIJ" 1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1190 { 1191 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1192 PetscErrorCode ierr; 1193 Vec bb1; 1194 1195 PetscFunctionBegin; 1196 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1197 1198 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1199 if (flag & SOR_ZERO_INITIAL_GUESS) { 1200 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1201 its--; 1202 } 1203 1204 while (its--) { 1205 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 1208 /* update rhs: bb1 = bb - B*x */ 1209 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1211 1212 /* local sweep */ 1213 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1214 } 1215 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1216 if (flag & SOR_ZERO_INITIAL_GUESS) { 1217 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1218 its--; 1219 } 1220 while (its--) { 1221 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 1224 /* update rhs: bb1 = bb - B*x */ 1225 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1226 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1227 1228 /* local sweep */ 1229 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1230 } 1231 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1232 if (flag & SOR_ZERO_INITIAL_GUESS) { 1233 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1234 its--; 1235 } 1236 while (its--) { 1237 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* update rhs: bb1 = bb - B*x */ 1241 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1242 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1243 1244 /* local sweep */ 1245 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1246 } 1247 } else { 1248 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1249 } 1250 1251 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatPermute_MPIAIJ" 1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1258 { 1259 MPI_Comm comm,pcomm; 1260 PetscInt first,local_size,nrows; 1261 const PetscInt *rows; 1262 int ntids; 1263 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1268 /* make a collective version of 'rowp' */ 1269 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1270 if (pcomm==comm) { 1271 crowp = rowp; 1272 } else { 1273 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1274 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1275 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1276 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1277 } 1278 /* collect the global row permutation and invert it */ 1279 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1280 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1281 if (pcomm!=comm) { 1282 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1283 } 1284 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1285 /* get the local target indices */ 1286 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1287 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1288 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1289 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1290 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1291 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1292 /* the column permutation is so much easier; 1293 make a local version of 'colp' and invert it */ 1294 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1295 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1296 if (ntids==1) { 1297 lcolp = colp; 1298 } else { 1299 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1300 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1301 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1302 } 1303 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1304 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1305 if (ntids>1) { 1306 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1307 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1308 } 1309 /* now we just get the submatrix */ 1310 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1311 /* clean up */ 1312 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1313 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1319 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1320 { 1321 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1322 Mat A = mat->A,B = mat->B; 1323 PetscErrorCode ierr; 1324 PetscReal isend[5],irecv[5]; 1325 1326 PetscFunctionBegin; 1327 info->block_size = 1.0; 1328 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1329 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1330 isend[3] = info->memory; isend[4] = info->mallocs; 1331 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1332 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1333 isend[3] += info->memory; isend[4] += info->mallocs; 1334 if (flag == MAT_LOCAL) { 1335 info->nz_used = isend[0]; 1336 info->nz_allocated = isend[1]; 1337 info->nz_unneeded = isend[2]; 1338 info->memory = isend[3]; 1339 info->mallocs = isend[4]; 1340 } else if (flag == MAT_GLOBAL_MAX) { 1341 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1342 info->nz_used = irecv[0]; 1343 info->nz_allocated = irecv[1]; 1344 info->nz_unneeded = irecv[2]; 1345 info->memory = irecv[3]; 1346 info->mallocs = irecv[4]; 1347 } else if (flag == MAT_GLOBAL_SUM) { 1348 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1349 info->nz_used = irecv[0]; 1350 info->nz_allocated = irecv[1]; 1351 info->nz_unneeded = irecv[2]; 1352 info->memory = irecv[3]; 1353 info->mallocs = irecv[4]; 1354 } 1355 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1356 info->fill_ratio_needed = 0; 1357 info->factor_mallocs = 0; 1358 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatSetOption_MPIAIJ" 1364 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1365 { 1366 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 switch (op) { 1371 case MAT_NEW_NONZERO_LOCATIONS: 1372 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1373 case MAT_KEEP_ZEROED_ROWS: 1374 case MAT_NEW_NONZERO_LOCATION_ERR: 1375 case MAT_USE_INODES: 1376 case MAT_IGNORE_ZERO_ENTRIES: 1377 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1378 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1379 break; 1380 case MAT_ROW_ORIENTED: 1381 a->roworiented = flg; 1382 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1383 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1384 break; 1385 case MAT_NEW_DIAGONALS: 1386 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1387 break; 1388 case MAT_IGNORE_OFF_PROC_ENTRIES: 1389 a->donotstash = PETSC_TRUE; 1390 break; 1391 case MAT_SYMMETRIC: 1392 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1393 break; 1394 case MAT_STRUCTURALLY_SYMMETRIC: 1395 case MAT_HERMITIAN: 1396 case MAT_SYMMETRY_ETERNAL: 1397 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1398 break; 1399 default: 1400 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatGetRow_MPIAIJ" 1407 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1408 { 1409 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1410 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1411 PetscErrorCode ierr; 1412 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1413 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1414 PetscInt *cmap,*idx_p; 1415 1416 PetscFunctionBegin; 1417 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1418 mat->getrowactive = PETSC_TRUE; 1419 1420 if (!mat->rowvalues && (idx || v)) { 1421 /* 1422 allocate enough space to hold information from the longest row. 1423 */ 1424 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1425 PetscInt max = 1,tmp; 1426 for (i=0; i<matin->rmap->n; i++) { 1427 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1428 if (max < tmp) { max = tmp; } 1429 } 1430 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1431 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1432 } 1433 1434 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1435 lrow = row - rstart; 1436 1437 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1438 if (!v) {pvA = 0; pvB = 0;} 1439 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1440 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1441 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1442 nztot = nzA + nzB; 1443 1444 cmap = mat->garray; 1445 if (v || idx) { 1446 if (nztot) { 1447 /* Sort by increasing column numbers, assuming A and B already sorted */ 1448 PetscInt imark = -1; 1449 if (v) { 1450 *v = v_p = mat->rowvalues; 1451 for (i=0; i<nzB; i++) { 1452 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1453 else break; 1454 } 1455 imark = i; 1456 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1457 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1458 } 1459 if (idx) { 1460 *idx = idx_p = mat->rowindices; 1461 if (imark > -1) { 1462 for (i=0; i<imark; i++) { 1463 idx_p[i] = cmap[cworkB[i]]; 1464 } 1465 } else { 1466 for (i=0; i<nzB; i++) { 1467 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1468 else break; 1469 } 1470 imark = i; 1471 } 1472 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1473 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1474 } 1475 } else { 1476 if (idx) *idx = 0; 1477 if (v) *v = 0; 1478 } 1479 } 1480 *nz = nztot; 1481 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1482 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1488 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1489 { 1490 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1491 1492 PetscFunctionBegin; 1493 if (!aij->getrowactive) { 1494 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1495 } 1496 aij->getrowactive = PETSC_FALSE; 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatNorm_MPIAIJ" 1502 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1503 { 1504 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1505 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1506 PetscErrorCode ierr; 1507 PetscInt i,j,cstart = mat->cmap->rstart; 1508 PetscReal sum = 0.0; 1509 MatScalar *v; 1510 1511 PetscFunctionBegin; 1512 if (aij->size == 1) { 1513 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1514 } else { 1515 if (type == NORM_FROBENIUS) { 1516 v = amat->a; 1517 for (i=0; i<amat->nz; i++) { 1518 #if defined(PETSC_USE_COMPLEX) 1519 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1520 #else 1521 sum += (*v)*(*v); v++; 1522 #endif 1523 } 1524 v = bmat->a; 1525 for (i=0; i<bmat->nz; i++) { 1526 #if defined(PETSC_USE_COMPLEX) 1527 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1528 #else 1529 sum += (*v)*(*v); v++; 1530 #endif 1531 } 1532 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1533 *norm = sqrt(*norm); 1534 } else if (type == NORM_1) { /* max column norm */ 1535 PetscReal *tmp,*tmp2; 1536 PetscInt *jj,*garray = aij->garray; 1537 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1538 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1539 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1540 *norm = 0.0; 1541 v = amat->a; jj = amat->j; 1542 for (j=0; j<amat->nz; j++) { 1543 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1544 } 1545 v = bmat->a; jj = bmat->j; 1546 for (j=0; j<bmat->nz; j++) { 1547 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1548 } 1549 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1550 for (j=0; j<mat->cmap->N; j++) { 1551 if (tmp2[j] > *norm) *norm = tmp2[j]; 1552 } 1553 ierr = PetscFree(tmp);CHKERRQ(ierr); 1554 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1555 } else if (type == NORM_INFINITY) { /* max row norm */ 1556 PetscReal ntemp = 0.0; 1557 for (j=0; j<aij->A->rmap->n; j++) { 1558 v = amat->a + amat->i[j]; 1559 sum = 0.0; 1560 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1561 sum += PetscAbsScalar(*v); v++; 1562 } 1563 v = bmat->a + bmat->i[j]; 1564 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1565 sum += PetscAbsScalar(*v); v++; 1566 } 1567 if (sum > ntemp) ntemp = sum; 1568 } 1569 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1570 } else { 1571 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1572 } 1573 } 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatTranspose_MPIAIJ" 1579 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1580 { 1581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1582 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1583 PetscErrorCode ierr; 1584 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1585 PetscInt cstart=A->cmap->rstart,ncol; 1586 Mat B; 1587 MatScalar *array; 1588 1589 PetscFunctionBegin; 1590 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1591 1592 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1593 ai = Aloc->i; aj = Aloc->j; 1594 bi = Bloc->i; bj = Bloc->j; 1595 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1596 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1597 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1598 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1599 for (i=0; i<ai[ma]; i++){ 1600 d_nnz[aj[i]] ++; 1601 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1602 } 1603 1604 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1605 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1606 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1607 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1608 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1609 } else { 1610 B = *matout; 1611 } 1612 1613 /* copy over the A part */ 1614 array = Aloc->a; 1615 row = A->rmap->rstart; 1616 for (i=0; i<ma; i++) { 1617 ncol = ai[i+1]-ai[i]; 1618 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1619 row++; array += ncol; aj += ncol; 1620 } 1621 aj = Aloc->j; 1622 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1623 1624 /* copy over the B part */ 1625 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1626 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1627 array = Bloc->a; 1628 row = A->rmap->rstart; 1629 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1630 cols_tmp = cols; 1631 for (i=0; i<mb; i++) { 1632 ncol = bi[i+1]-bi[i]; 1633 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1634 row++; array += ncol; cols_tmp += ncol; 1635 } 1636 ierr = PetscFree(cols);CHKERRQ(ierr); 1637 1638 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1639 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1640 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1641 *matout = B; 1642 } else { 1643 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1644 } 1645 PetscFunctionReturn(0); 1646 } 1647 1648 #undef __FUNCT__ 1649 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1650 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1651 { 1652 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1653 Mat a = aij->A,b = aij->B; 1654 PetscErrorCode ierr; 1655 PetscInt s1,s2,s3; 1656 1657 PetscFunctionBegin; 1658 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1659 if (rr) { 1660 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1661 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1662 /* Overlap communication with computation. */ 1663 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 } 1665 if (ll) { 1666 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1667 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1668 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1669 } 1670 /* scale the diagonal block */ 1671 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1672 1673 if (rr) { 1674 /* Do a scatter end and then right scale the off-diagonal block */ 1675 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1676 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1677 } 1678 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1684 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1685 { 1686 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1687 PetscErrorCode ierr; 1688 1689 PetscFunctionBegin; 1690 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1691 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1692 PetscFunctionReturn(0); 1693 } 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1696 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1697 { 1698 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatEqual_MPIAIJ" 1708 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1709 { 1710 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1711 Mat a,b,c,d; 1712 PetscTruth flg; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 a = matA->A; b = matA->B; 1717 c = matB->A; d = matB->B; 1718 1719 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1720 if (flg) { 1721 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1722 } 1723 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatCopy_MPIAIJ" 1729 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1730 { 1731 PetscErrorCode ierr; 1732 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1733 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1734 1735 PetscFunctionBegin; 1736 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1737 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1738 /* because of the column compression in the off-processor part of the matrix a->B, 1739 the number of columns in a->B and b->B may be different, hence we cannot call 1740 the MatCopy() directly on the two parts. If need be, we can provide a more 1741 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1742 then copying the submatrices */ 1743 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1744 } else { 1745 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1746 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1747 } 1748 PetscFunctionReturn(0); 1749 } 1750 1751 #undef __FUNCT__ 1752 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1753 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1754 { 1755 PetscErrorCode ierr; 1756 1757 PetscFunctionBegin; 1758 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1759 PetscFunctionReturn(0); 1760 } 1761 1762 #include "petscblaslapack.h" 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatAXPY_MPIAIJ" 1765 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1766 { 1767 PetscErrorCode ierr; 1768 PetscInt i; 1769 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1770 PetscBLASInt bnz,one=1; 1771 Mat_SeqAIJ *x,*y; 1772 1773 PetscFunctionBegin; 1774 if (str == SAME_NONZERO_PATTERN) { 1775 PetscScalar alpha = a; 1776 x = (Mat_SeqAIJ *)xx->A->data; 1777 y = (Mat_SeqAIJ *)yy->A->data; 1778 bnz = PetscBLASIntCast(x->nz); 1779 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1780 x = (Mat_SeqAIJ *)xx->B->data; 1781 y = (Mat_SeqAIJ *)yy->B->data; 1782 bnz = PetscBLASIntCast(x->nz); 1783 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1784 } else if (str == SUBSET_NONZERO_PATTERN) { 1785 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1786 1787 x = (Mat_SeqAIJ *)xx->B->data; 1788 y = (Mat_SeqAIJ *)yy->B->data; 1789 if (y->xtoy && y->XtoY != xx->B) { 1790 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1791 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1792 } 1793 if (!y->xtoy) { /* get xtoy */ 1794 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1795 y->XtoY = xx->B; 1796 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1797 } 1798 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1799 } else { 1800 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1801 } 1802 PetscFunctionReturn(0); 1803 } 1804 1805 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatConjugate_MPIAIJ" 1809 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1810 { 1811 #if defined(PETSC_USE_COMPLEX) 1812 PetscErrorCode ierr; 1813 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1814 1815 PetscFunctionBegin; 1816 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1817 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1818 #else 1819 PetscFunctionBegin; 1820 #endif 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatRealPart_MPIAIJ" 1826 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1827 { 1828 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1829 PetscErrorCode ierr; 1830 1831 PetscFunctionBegin; 1832 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1833 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1839 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1840 { 1841 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1842 PetscErrorCode ierr; 1843 1844 PetscFunctionBegin; 1845 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1846 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #ifdef PETSC_HAVE_PBGL 1851 1852 #include <boost/parallel/mpi/bsp_process_group.hpp> 1853 #include <boost/graph/distributed/ilu_default_graph.hpp> 1854 #include <boost/graph/distributed/ilu_0_block.hpp> 1855 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1856 #include <boost/graph/distributed/petsc/interface.hpp> 1857 #include <boost/multi_array.hpp> 1858 #include <boost/parallel/distributed_property_map->hpp> 1859 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1862 /* 1863 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1864 */ 1865 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1866 { 1867 namespace petsc = boost::distributed::petsc; 1868 1869 namespace graph_dist = boost::graph::distributed; 1870 using boost::graph::distributed::ilu_default::process_group_type; 1871 using boost::graph::ilu_permuted; 1872 1873 PetscTruth row_identity, col_identity; 1874 PetscContainer c; 1875 PetscInt m, n, M, N; 1876 PetscErrorCode ierr; 1877 1878 PetscFunctionBegin; 1879 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1880 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1881 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1882 if (!row_identity || !col_identity) { 1883 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1884 } 1885 1886 process_group_type pg; 1887 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1888 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1889 lgraph_type& level_graph = *lgraph_p; 1890 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1891 1892 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1893 ilu_permuted(level_graph); 1894 1895 /* put together the new matrix */ 1896 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1897 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1898 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1899 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1900 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1901 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1902 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1903 1904 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1905 ierr = PetscContainerSetPointer(c, lgraph_p); 1906 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1912 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1913 { 1914 PetscFunctionBegin; 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatSolve_MPIAIJ" 1920 /* 1921 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1922 */ 1923 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1924 { 1925 namespace graph_dist = boost::graph::distributed; 1926 1927 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1928 lgraph_type* lgraph_p; 1929 PetscContainer c; 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1934 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1935 ierr = VecCopy(b, x); CHKERRQ(ierr); 1936 1937 PetscScalar* array_x; 1938 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1939 PetscInt sx; 1940 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1941 1942 PetscScalar* array_b; 1943 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1944 PetscInt sb; 1945 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1946 1947 lgraph_type& level_graph = *lgraph_p; 1948 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1949 1950 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1951 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1952 ref_x(array_x, boost::extents[num_vertices(graph)]); 1953 1954 typedef boost::iterator_property_map<array_ref_type::iterator, 1955 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1956 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1957 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1958 1959 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1960 1961 PetscFunctionReturn(0); 1962 } 1963 #endif 1964 1965 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1966 PetscInt nzlocal,nsends,nrecvs; 1967 PetscMPIInt *send_rank; 1968 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1969 PetscScalar *sbuf_a,**rbuf_a; 1970 PetscErrorCode (*MatDestroy)(Mat); 1971 } Mat_Redundant; 1972 1973 #undef __FUNCT__ 1974 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1975 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1976 { 1977 PetscErrorCode ierr; 1978 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1979 PetscInt i; 1980 1981 PetscFunctionBegin; 1982 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1983 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1984 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1985 for (i=0; i<redund->nrecvs; i++){ 1986 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1987 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1988 } 1989 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1990 ierr = PetscFree(redund);CHKERRQ(ierr); 1991 PetscFunctionReturn(0); 1992 } 1993 1994 #undef __FUNCT__ 1995 #define __FUNCT__ "MatDestroy_MatRedundant" 1996 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1997 { 1998 PetscErrorCode ierr; 1999 PetscContainer container; 2000 Mat_Redundant *redund=PETSC_NULL; 2001 2002 PetscFunctionBegin; 2003 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2004 if (container) { 2005 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2006 } else { 2007 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2008 } 2009 A->ops->destroy = redund->MatDestroy; 2010 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2011 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2012 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2013 PetscFunctionReturn(0); 2014 } 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2018 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2019 { 2020 PetscMPIInt rank,size; 2021 MPI_Comm comm=((PetscObject)mat)->comm; 2022 PetscErrorCode ierr; 2023 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2024 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2025 PetscInt *rowrange=mat->rmap->range; 2026 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2027 Mat A=aij->A,B=aij->B,C=*matredundant; 2028 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2029 PetscScalar *sbuf_a; 2030 PetscInt nzlocal=a->nz+b->nz; 2031 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2032 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2033 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2034 MatScalar *aworkA,*aworkB; 2035 PetscScalar *vals; 2036 PetscMPIInt tag1,tag2,tag3,imdex; 2037 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2038 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2039 MPI_Status recv_status,*send_status; 2040 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2041 PetscInt **rbuf_j=PETSC_NULL; 2042 PetscScalar **rbuf_a=PETSC_NULL; 2043 Mat_Redundant *redund=PETSC_NULL; 2044 PetscContainer container; 2045 2046 PetscFunctionBegin; 2047 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2048 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2049 2050 if (reuse == MAT_REUSE_MATRIX) { 2051 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2052 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2053 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2054 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2055 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2056 if (container) { 2057 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2058 } else { 2059 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2060 } 2061 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2062 2063 nsends = redund->nsends; 2064 nrecvs = redund->nrecvs; 2065 send_rank = redund->send_rank; recv_rank = send_rank + size; 2066 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2067 sbuf_j = redund->sbuf_j; 2068 sbuf_a = redund->sbuf_a; 2069 rbuf_j = redund->rbuf_j; 2070 rbuf_a = redund->rbuf_a; 2071 } 2072 2073 if (reuse == MAT_INITIAL_MATRIX){ 2074 PetscMPIInt subrank,subsize; 2075 PetscInt nleftover,np_subcomm; 2076 /* get the destination processors' id send_rank, nsends and nrecvs */ 2077 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2078 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2079 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2080 recv_rank = send_rank + size; 2081 np_subcomm = size/nsubcomm; 2082 nleftover = size - nsubcomm*np_subcomm; 2083 nsends = 0; nrecvs = 0; 2084 for (i=0; i<size; i++){ /* i=rank*/ 2085 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2086 send_rank[nsends] = i; nsends++; 2087 recv_rank[nrecvs++] = i; 2088 } 2089 } 2090 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2091 i = size-nleftover-1; 2092 j = 0; 2093 while (j < nsubcomm - nleftover){ 2094 send_rank[nsends++] = i; 2095 i--; j++; 2096 } 2097 } 2098 2099 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2100 for (i=0; i<nleftover; i++){ 2101 recv_rank[nrecvs++] = size-nleftover+i; 2102 } 2103 } 2104 2105 /* allocate sbuf_j, sbuf_a */ 2106 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2107 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2108 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2109 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2110 2111 /* copy mat's local entries into the buffers */ 2112 if (reuse == MAT_INITIAL_MATRIX){ 2113 rownz_max = 0; 2114 rptr = sbuf_j; 2115 cols = sbuf_j + rend-rstart + 1; 2116 vals = sbuf_a; 2117 rptr[0] = 0; 2118 for (i=0; i<rend-rstart; i++){ 2119 row = i + rstart; 2120 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2121 ncols = nzA + nzB; 2122 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2123 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2124 /* load the column indices for this row into cols */ 2125 lwrite = 0; 2126 for (l=0; l<nzB; l++) { 2127 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2128 vals[lwrite] = aworkB[l]; 2129 cols[lwrite++] = ctmp; 2130 } 2131 } 2132 for (l=0; l<nzA; l++){ 2133 vals[lwrite] = aworkA[l]; 2134 cols[lwrite++] = cstart + cworkA[l]; 2135 } 2136 for (l=0; l<nzB; l++) { 2137 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2138 vals[lwrite] = aworkB[l]; 2139 cols[lwrite++] = ctmp; 2140 } 2141 } 2142 vals += ncols; 2143 cols += ncols; 2144 rptr[i+1] = rptr[i] + ncols; 2145 if (rownz_max < ncols) rownz_max = ncols; 2146 } 2147 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2148 } else { /* only copy matrix values into sbuf_a */ 2149 rptr = sbuf_j; 2150 vals = sbuf_a; 2151 rptr[0] = 0; 2152 for (i=0; i<rend-rstart; i++){ 2153 row = i + rstart; 2154 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2155 ncols = nzA + nzB; 2156 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2157 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2158 lwrite = 0; 2159 for (l=0; l<nzB; l++) { 2160 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2161 } 2162 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2163 for (l=0; l<nzB; l++) { 2164 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2165 } 2166 vals += ncols; 2167 rptr[i+1] = rptr[i] + ncols; 2168 } 2169 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2170 2171 /* send nzlocal to others, and recv other's nzlocal */ 2172 /*--------------------------------------------------*/ 2173 if (reuse == MAT_INITIAL_MATRIX){ 2174 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2175 s_waits2 = s_waits3 + nsends; 2176 s_waits1 = s_waits2 + nsends; 2177 r_waits1 = s_waits1 + nsends; 2178 r_waits2 = r_waits1 + nrecvs; 2179 r_waits3 = r_waits2 + nrecvs; 2180 } else { 2181 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2182 r_waits3 = s_waits3 + nsends; 2183 } 2184 2185 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2186 if (reuse == MAT_INITIAL_MATRIX){ 2187 /* get new tags to keep the communication clean */ 2188 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2189 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2190 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2191 rbuf_nz = sbuf_nz + nsends; 2192 2193 /* post receives of other's nzlocal */ 2194 for (i=0; i<nrecvs; i++){ 2195 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2196 } 2197 /* send nzlocal to others */ 2198 for (i=0; i<nsends; i++){ 2199 sbuf_nz[i] = nzlocal; 2200 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2201 } 2202 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2203 count = nrecvs; 2204 while (count) { 2205 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2206 recv_rank[imdex] = recv_status.MPI_SOURCE; 2207 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2208 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2209 2210 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2211 rbuf_nz[imdex] += i + 2; 2212 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2213 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2214 count--; 2215 } 2216 /* wait on sends of nzlocal */ 2217 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2218 /* send mat->i,j to others, and recv from other's */ 2219 /*------------------------------------------------*/ 2220 for (i=0; i<nsends; i++){ 2221 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2222 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2223 } 2224 /* wait on receives of mat->i,j */ 2225 /*------------------------------*/ 2226 count = nrecvs; 2227 while (count) { 2228 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2229 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2230 count--; 2231 } 2232 /* wait on sends of mat->i,j */ 2233 /*---------------------------*/ 2234 if (nsends) { 2235 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2236 } 2237 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2238 2239 /* post receives, send and receive mat->a */ 2240 /*----------------------------------------*/ 2241 for (imdex=0; imdex<nrecvs; imdex++) { 2242 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2243 } 2244 for (i=0; i<nsends; i++){ 2245 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2246 } 2247 count = nrecvs; 2248 while (count) { 2249 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2250 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2251 count--; 2252 } 2253 if (nsends) { 2254 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2255 } 2256 2257 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2258 2259 /* create redundant matrix */ 2260 /*-------------------------*/ 2261 if (reuse == MAT_INITIAL_MATRIX){ 2262 /* compute rownz_max for preallocation */ 2263 for (imdex=0; imdex<nrecvs; imdex++){ 2264 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2265 rptr = rbuf_j[imdex]; 2266 for (i=0; i<j; i++){ 2267 ncols = rptr[i+1] - rptr[i]; 2268 if (rownz_max < ncols) rownz_max = ncols; 2269 } 2270 } 2271 2272 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2273 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2274 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2275 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2276 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2277 } else { 2278 C = *matredundant; 2279 } 2280 2281 /* insert local matrix entries */ 2282 rptr = sbuf_j; 2283 cols = sbuf_j + rend-rstart + 1; 2284 vals = sbuf_a; 2285 for (i=0; i<rend-rstart; i++){ 2286 row = i + rstart; 2287 ncols = rptr[i+1] - rptr[i]; 2288 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2289 vals += ncols; 2290 cols += ncols; 2291 } 2292 /* insert received matrix entries */ 2293 for (imdex=0; imdex<nrecvs; imdex++){ 2294 rstart = rowrange[recv_rank[imdex]]; 2295 rend = rowrange[recv_rank[imdex]+1]; 2296 rptr = rbuf_j[imdex]; 2297 cols = rbuf_j[imdex] + rend-rstart + 1; 2298 vals = rbuf_a[imdex]; 2299 for (i=0; i<rend-rstart; i++){ 2300 row = i + rstart; 2301 ncols = rptr[i+1] - rptr[i]; 2302 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2303 vals += ncols; 2304 cols += ncols; 2305 } 2306 } 2307 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2308 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2309 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2310 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2311 if (reuse == MAT_INITIAL_MATRIX){ 2312 PetscContainer container; 2313 *matredundant = C; 2314 /* create a supporting struct and attach it to C for reuse */ 2315 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2316 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2317 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2318 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2319 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2320 2321 redund->nzlocal = nzlocal; 2322 redund->nsends = nsends; 2323 redund->nrecvs = nrecvs; 2324 redund->send_rank = send_rank; 2325 redund->sbuf_nz = sbuf_nz; 2326 redund->sbuf_j = sbuf_j; 2327 redund->sbuf_a = sbuf_a; 2328 redund->rbuf_j = rbuf_j; 2329 redund->rbuf_a = rbuf_a; 2330 2331 redund->MatDestroy = C->ops->destroy; 2332 C->ops->destroy = MatDestroy_MatRedundant; 2333 } 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2339 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2340 { 2341 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2342 PetscErrorCode ierr; 2343 PetscInt i,*idxb = 0; 2344 PetscScalar *va,*vb; 2345 Vec vtmp; 2346 2347 PetscFunctionBegin; 2348 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2349 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2350 if (idx) { 2351 for (i=0; i<A->cmap->n; i++) { 2352 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2353 } 2354 } 2355 2356 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2357 if (idx) { 2358 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2359 } 2360 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2361 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2362 2363 for (i=0; i<A->rmap->n; i++){ 2364 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2365 va[i] = vb[i]; 2366 if (idx) idx[i] = a->garray[idxb[i]]; 2367 } 2368 } 2369 2370 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2371 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2372 if (idxb) { 2373 ierr = PetscFree(idxb);CHKERRQ(ierr); 2374 } 2375 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2381 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2382 { 2383 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2384 PetscErrorCode ierr; 2385 PetscInt i,*idxb = 0; 2386 PetscScalar *va,*vb; 2387 Vec vtmp; 2388 2389 PetscFunctionBegin; 2390 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2391 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2392 if (idx) { 2393 for (i=0; i<A->cmap->n; i++) { 2394 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2395 } 2396 } 2397 2398 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2399 if (idx) { 2400 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2401 } 2402 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2403 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2404 2405 for (i=0; i<A->rmap->n; i++){ 2406 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2407 va[i] = vb[i]; 2408 if (idx) idx[i] = a->garray[idxb[i]]; 2409 } 2410 } 2411 2412 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2413 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2414 if (idxb) { 2415 ierr = PetscFree(idxb);CHKERRQ(ierr); 2416 } 2417 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2423 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2424 { 2425 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2426 PetscInt n = A->rmap->n; 2427 PetscInt cstart = A->cmap->rstart; 2428 PetscInt *cmap = mat->garray; 2429 PetscInt *diagIdx, *offdiagIdx; 2430 Vec diagV, offdiagV; 2431 PetscScalar *a, *diagA, *offdiagA; 2432 PetscInt r; 2433 PetscErrorCode ierr; 2434 2435 PetscFunctionBegin; 2436 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2437 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2438 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2439 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2440 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2441 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2442 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2443 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2444 for(r = 0; r < n; ++r) { 2445 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2446 a[r] = diagA[r]; 2447 idx[r] = cstart + diagIdx[r]; 2448 } else { 2449 a[r] = offdiagA[r]; 2450 idx[r] = cmap[offdiagIdx[r]]; 2451 } 2452 } 2453 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2454 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2455 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2456 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2457 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2458 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2459 PetscFunctionReturn(0); 2460 } 2461 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2464 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2465 { 2466 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2467 PetscInt n = A->rmap->n; 2468 PetscInt cstart = A->cmap->rstart; 2469 PetscInt *cmap = mat->garray; 2470 PetscInt *diagIdx, *offdiagIdx; 2471 Vec diagV, offdiagV; 2472 PetscScalar *a, *diagA, *offdiagA; 2473 PetscInt r; 2474 PetscErrorCode ierr; 2475 2476 PetscFunctionBegin; 2477 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2478 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2479 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2480 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2481 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2482 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2483 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2484 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2485 for(r = 0; r < n; ++r) { 2486 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2487 a[r] = diagA[r]; 2488 idx[r] = cstart + diagIdx[r]; 2489 } else { 2490 a[r] = offdiagA[r]; 2491 idx[r] = cmap[offdiagIdx[r]]; 2492 } 2493 } 2494 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2495 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2496 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2497 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2498 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2499 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2505 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2506 { 2507 PetscErrorCode ierr; 2508 2509 PetscFunctionBegin; 2510 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2511 PetscFunctionReturn(0); 2512 } 2513 2514 /* -------------------------------------------------------------------*/ 2515 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2516 MatGetRow_MPIAIJ, 2517 MatRestoreRow_MPIAIJ, 2518 MatMult_MPIAIJ, 2519 /* 4*/ MatMultAdd_MPIAIJ, 2520 MatMultTranspose_MPIAIJ, 2521 MatMultTransposeAdd_MPIAIJ, 2522 #ifdef PETSC_HAVE_PBGL 2523 MatSolve_MPIAIJ, 2524 #else 2525 0, 2526 #endif 2527 0, 2528 0, 2529 /*10*/ 0, 2530 0, 2531 0, 2532 MatRelax_MPIAIJ, 2533 MatTranspose_MPIAIJ, 2534 /*15*/ MatGetInfo_MPIAIJ, 2535 MatEqual_MPIAIJ, 2536 MatGetDiagonal_MPIAIJ, 2537 MatDiagonalScale_MPIAIJ, 2538 MatNorm_MPIAIJ, 2539 /*20*/ MatAssemblyBegin_MPIAIJ, 2540 MatAssemblyEnd_MPIAIJ, 2541 0, 2542 MatSetOption_MPIAIJ, 2543 MatZeroEntries_MPIAIJ, 2544 /*25*/ MatZeroRows_MPIAIJ, 2545 0, 2546 #ifdef PETSC_HAVE_PBGL 2547 0, 2548 #else 2549 0, 2550 #endif 2551 0, 2552 0, 2553 /*30*/ MatSetUpPreallocation_MPIAIJ, 2554 #ifdef PETSC_HAVE_PBGL 2555 0, 2556 #else 2557 0, 2558 #endif 2559 0, 2560 0, 2561 0, 2562 /*35*/ MatDuplicate_MPIAIJ, 2563 0, 2564 0, 2565 0, 2566 0, 2567 /*40*/ MatAXPY_MPIAIJ, 2568 MatGetSubMatrices_MPIAIJ, 2569 MatIncreaseOverlap_MPIAIJ, 2570 MatGetValues_MPIAIJ, 2571 MatCopy_MPIAIJ, 2572 /*45*/ MatGetRowMax_MPIAIJ, 2573 MatScale_MPIAIJ, 2574 0, 2575 0, 2576 0, 2577 /*50*/ MatSetBlockSize_MPIAIJ, 2578 0, 2579 0, 2580 0, 2581 0, 2582 /*55*/ MatFDColoringCreate_MPIAIJ, 2583 0, 2584 MatSetUnfactored_MPIAIJ, 2585 MatPermute_MPIAIJ, 2586 0, 2587 /*60*/ MatGetSubMatrix_MPIAIJ, 2588 MatDestroy_MPIAIJ, 2589 MatView_MPIAIJ, 2590 0, 2591 0, 2592 /*65*/ 0, 2593 0, 2594 0, 2595 0, 2596 0, 2597 /*70*/ MatGetRowMaxAbs_MPIAIJ, 2598 MatGetRowMinAbs_MPIAIJ, 2599 0, 2600 MatSetColoring_MPIAIJ, 2601 #if defined(PETSC_HAVE_ADIC) 2602 MatSetValuesAdic_MPIAIJ, 2603 #else 2604 0, 2605 #endif 2606 MatSetValuesAdifor_MPIAIJ, 2607 /*75*/ 0, 2608 0, 2609 0, 2610 0, 2611 0, 2612 /*80*/ 0, 2613 0, 2614 0, 2615 /*84*/ MatLoad_MPIAIJ, 2616 0, 2617 0, 2618 0, 2619 0, 2620 0, 2621 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2622 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2623 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2624 MatPtAP_Basic, 2625 MatPtAPSymbolic_MPIAIJ, 2626 /*95*/ MatPtAPNumeric_MPIAIJ, 2627 0, 2628 0, 2629 0, 2630 0, 2631 /*100*/0, 2632 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2633 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2634 MatConjugate_MPIAIJ, 2635 0, 2636 /*105*/MatSetValuesRow_MPIAIJ, 2637 MatRealPart_MPIAIJ, 2638 MatImaginaryPart_MPIAIJ, 2639 0, 2640 0, 2641 /*110*/0, 2642 MatGetRedundantMatrix_MPIAIJ, 2643 MatGetRowMin_MPIAIJ, 2644 0, 2645 0, 2646 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2647 2648 /* ----------------------------------------------------------------------------------------*/ 2649 2650 EXTERN_C_BEGIN 2651 #undef __FUNCT__ 2652 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2653 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2654 { 2655 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2656 PetscErrorCode ierr; 2657 2658 PetscFunctionBegin; 2659 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2660 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2661 PetscFunctionReturn(0); 2662 } 2663 EXTERN_C_END 2664 2665 EXTERN_C_BEGIN 2666 #undef __FUNCT__ 2667 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2668 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2669 { 2670 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2671 PetscErrorCode ierr; 2672 2673 PetscFunctionBegin; 2674 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2675 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2676 PetscFunctionReturn(0); 2677 } 2678 EXTERN_C_END 2679 2680 #include "petscpc.h" 2681 EXTERN_C_BEGIN 2682 #undef __FUNCT__ 2683 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2684 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2685 { 2686 Mat_MPIAIJ *b; 2687 PetscErrorCode ierr; 2688 PetscInt i; 2689 2690 PetscFunctionBegin; 2691 B->preallocated = PETSC_TRUE; 2692 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2693 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2694 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2695 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2696 2697 B->rmap->bs = B->cmap->bs = 1; 2698 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2699 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2700 if (d_nnz) { 2701 for (i=0; i<B->rmap->n; i++) { 2702 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2703 } 2704 } 2705 if (o_nnz) { 2706 for (i=0; i<B->rmap->n; i++) { 2707 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2708 } 2709 } 2710 b = (Mat_MPIAIJ*)B->data; 2711 2712 /* Explicitly create 2 MATSEQAIJ matrices. */ 2713 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2714 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2715 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2716 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2717 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2718 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2719 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2720 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2721 2722 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2723 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2724 2725 PetscFunctionReturn(0); 2726 } 2727 EXTERN_C_END 2728 2729 #undef __FUNCT__ 2730 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2731 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2732 { 2733 Mat mat; 2734 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2735 PetscErrorCode ierr; 2736 2737 PetscFunctionBegin; 2738 *newmat = 0; 2739 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2740 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2741 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2742 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2743 a = (Mat_MPIAIJ*)mat->data; 2744 2745 mat->factor = matin->factor; 2746 mat->rmap->bs = matin->rmap->bs; 2747 mat->assembled = PETSC_TRUE; 2748 mat->insertmode = NOT_SET_VALUES; 2749 mat->preallocated = PETSC_TRUE; 2750 2751 a->size = oldmat->size; 2752 a->rank = oldmat->rank; 2753 a->donotstash = oldmat->donotstash; 2754 a->roworiented = oldmat->roworiented; 2755 a->rowindices = 0; 2756 a->rowvalues = 0; 2757 a->getrowactive = PETSC_FALSE; 2758 2759 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2760 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2761 2762 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2763 if (oldmat->colmap) { 2764 #if defined (PETSC_USE_CTABLE) 2765 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2766 #else 2767 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2768 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2769 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2770 #endif 2771 } else a->colmap = 0; 2772 if (oldmat->garray) { 2773 PetscInt len; 2774 len = oldmat->B->cmap->n; 2775 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2776 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2777 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2778 } else a->garray = 0; 2779 2780 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2781 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2782 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2783 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2784 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2785 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2786 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2787 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2788 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2789 *newmat = mat; 2790 PetscFunctionReturn(0); 2791 } 2792 2793 #include "petscsys.h" 2794 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatLoad_MPIAIJ" 2797 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2798 { 2799 Mat A; 2800 PetscScalar *vals,*svals; 2801 MPI_Comm comm = ((PetscObject)viewer)->comm; 2802 MPI_Status status; 2803 PetscErrorCode ierr; 2804 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2805 PetscInt i,nz,j,rstart,rend,mmax; 2806 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2807 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2808 PetscInt cend,cstart,n,*rowners; 2809 int fd; 2810 2811 PetscFunctionBegin; 2812 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2813 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2814 if (!rank) { 2815 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2816 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2817 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2818 } 2819 2820 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2821 M = header[1]; N = header[2]; 2822 /* determine ownership of all rows */ 2823 m = M/size + ((M % size) > rank); 2824 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2825 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2826 2827 /* First process needs enough room for process with most rows */ 2828 if (!rank) { 2829 mmax = rowners[1]; 2830 for (i=2; i<size; i++) { 2831 mmax = PetscMax(mmax,rowners[i]); 2832 } 2833 } else mmax = m; 2834 2835 rowners[0] = 0; 2836 for (i=2; i<=size; i++) { 2837 rowners[i] += rowners[i-1]; 2838 } 2839 rstart = rowners[rank]; 2840 rend = rowners[rank+1]; 2841 2842 /* distribute row lengths to all processors */ 2843 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2844 if (!rank) { 2845 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2846 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2847 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2848 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2849 for (j=0; j<m; j++) { 2850 procsnz[0] += ourlens[j]; 2851 } 2852 for (i=1; i<size; i++) { 2853 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2854 /* calculate the number of nonzeros on each processor */ 2855 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2856 procsnz[i] += rowlengths[j]; 2857 } 2858 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2859 } 2860 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2861 } else { 2862 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2863 } 2864 2865 if (!rank) { 2866 /* determine max buffer needed and allocate it */ 2867 maxnz = 0; 2868 for (i=0; i<size; i++) { 2869 maxnz = PetscMax(maxnz,procsnz[i]); 2870 } 2871 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2872 2873 /* read in my part of the matrix column indices */ 2874 nz = procsnz[0]; 2875 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2876 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2877 2878 /* read in every one elses and ship off */ 2879 for (i=1; i<size; i++) { 2880 nz = procsnz[i]; 2881 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2882 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2883 } 2884 ierr = PetscFree(cols);CHKERRQ(ierr); 2885 } else { 2886 /* determine buffer space needed for message */ 2887 nz = 0; 2888 for (i=0; i<m; i++) { 2889 nz += ourlens[i]; 2890 } 2891 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2892 2893 /* receive message of column indices*/ 2894 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2895 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2896 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2897 } 2898 2899 /* determine column ownership if matrix is not square */ 2900 if (N != M) { 2901 n = N/size + ((N % size) > rank); 2902 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2903 cstart = cend - n; 2904 } else { 2905 cstart = rstart; 2906 cend = rend; 2907 n = cend - cstart; 2908 } 2909 2910 /* loop over local rows, determining number of off diagonal entries */ 2911 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2912 jj = 0; 2913 for (i=0; i<m; i++) { 2914 for (j=0; j<ourlens[i]; j++) { 2915 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2916 jj++; 2917 } 2918 } 2919 2920 /* create our matrix */ 2921 for (i=0; i<m; i++) { 2922 ourlens[i] -= offlens[i]; 2923 } 2924 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2925 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2926 ierr = MatSetType(A,type);CHKERRQ(ierr); 2927 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2928 2929 for (i=0; i<m; i++) { 2930 ourlens[i] += offlens[i]; 2931 } 2932 2933 if (!rank) { 2934 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2935 2936 /* read in my part of the matrix numerical values */ 2937 nz = procsnz[0]; 2938 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2939 2940 /* insert into matrix */ 2941 jj = rstart; 2942 smycols = mycols; 2943 svals = vals; 2944 for (i=0; i<m; i++) { 2945 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2946 smycols += ourlens[i]; 2947 svals += ourlens[i]; 2948 jj++; 2949 } 2950 2951 /* read in other processors and ship out */ 2952 for (i=1; i<size; i++) { 2953 nz = procsnz[i]; 2954 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2955 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2956 } 2957 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2958 } else { 2959 /* receive numeric values */ 2960 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2961 2962 /* receive message of values*/ 2963 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2964 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2965 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2966 2967 /* insert into matrix */ 2968 jj = rstart; 2969 smycols = mycols; 2970 svals = vals; 2971 for (i=0; i<m; i++) { 2972 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2973 smycols += ourlens[i]; 2974 svals += ourlens[i]; 2975 jj++; 2976 } 2977 } 2978 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2979 ierr = PetscFree(vals);CHKERRQ(ierr); 2980 ierr = PetscFree(mycols);CHKERRQ(ierr); 2981 ierr = PetscFree(rowners);CHKERRQ(ierr); 2982 2983 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2984 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2985 *newmat = A; 2986 PetscFunctionReturn(0); 2987 } 2988 2989 #undef __FUNCT__ 2990 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2991 /* 2992 Not great since it makes two copies of the submatrix, first an SeqAIJ 2993 in local and then by concatenating the local matrices the end result. 2994 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2995 */ 2996 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2997 { 2998 PetscErrorCode ierr; 2999 PetscMPIInt rank,size; 3000 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3001 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3002 Mat *local,M,Mreuse; 3003 MatScalar *vwork,*aa; 3004 MPI_Comm comm = ((PetscObject)mat)->comm; 3005 Mat_SeqAIJ *aij; 3006 3007 3008 PetscFunctionBegin; 3009 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3010 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3011 3012 if (call == MAT_REUSE_MATRIX) { 3013 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3014 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3015 local = &Mreuse; 3016 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3017 } else { 3018 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3019 Mreuse = *local; 3020 ierr = PetscFree(local);CHKERRQ(ierr); 3021 } 3022 3023 /* 3024 m - number of local rows 3025 n - number of columns (same on all processors) 3026 rstart - first row in new global matrix generated 3027 */ 3028 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3029 if (call == MAT_INITIAL_MATRIX) { 3030 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3031 ii = aij->i; 3032 jj = aij->j; 3033 3034 /* 3035 Determine the number of non-zeros in the diagonal and off-diagonal 3036 portions of the matrix in order to do correct preallocation 3037 */ 3038 3039 /* first get start and end of "diagonal" columns */ 3040 if (csize == PETSC_DECIDE) { 3041 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3042 if (mglobal == n) { /* square matrix */ 3043 nlocal = m; 3044 } else { 3045 nlocal = n/size + ((n % size) > rank); 3046 } 3047 } else { 3048 nlocal = csize; 3049 } 3050 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3051 rstart = rend - nlocal; 3052 if (rank == size - 1 && rend != n) { 3053 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3054 } 3055 3056 /* next, compute all the lengths */ 3057 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3058 olens = dlens + m; 3059 for (i=0; i<m; i++) { 3060 jend = ii[i+1] - ii[i]; 3061 olen = 0; 3062 dlen = 0; 3063 for (j=0; j<jend; j++) { 3064 if (*jj < rstart || *jj >= rend) olen++; 3065 else dlen++; 3066 jj++; 3067 } 3068 olens[i] = olen; 3069 dlens[i] = dlen; 3070 } 3071 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3072 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3073 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3074 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3075 ierr = PetscFree(dlens);CHKERRQ(ierr); 3076 } else { 3077 PetscInt ml,nl; 3078 3079 M = *newmat; 3080 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3081 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3082 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3083 /* 3084 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3085 rather than the slower MatSetValues(). 3086 */ 3087 M->was_assembled = PETSC_TRUE; 3088 M->assembled = PETSC_FALSE; 3089 } 3090 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3091 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3092 ii = aij->i; 3093 jj = aij->j; 3094 aa = aij->a; 3095 for (i=0; i<m; i++) { 3096 row = rstart + i; 3097 nz = ii[i+1] - ii[i]; 3098 cwork = jj; jj += nz; 3099 vwork = aa; aa += nz; 3100 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3101 } 3102 3103 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3104 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3105 *newmat = M; 3106 3107 /* save submatrix used in processor for next request */ 3108 if (call == MAT_INITIAL_MATRIX) { 3109 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3110 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3111 } 3112 3113 PetscFunctionReturn(0); 3114 } 3115 3116 EXTERN_C_BEGIN 3117 #undef __FUNCT__ 3118 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3119 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3120 { 3121 PetscInt m,cstart, cend,j,nnz,i,d; 3122 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3123 const PetscInt *JJ; 3124 PetscScalar *values; 3125 PetscErrorCode ierr; 3126 3127 PetscFunctionBegin; 3128 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3129 3130 B->rmap->bs = B->cmap->bs = 1; 3131 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3132 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3133 m = B->rmap->n; 3134 cstart = B->cmap->rstart; 3135 cend = B->cmap->rend; 3136 rstart = B->rmap->rstart; 3137 3138 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3139 o_nnz = d_nnz + m; 3140 3141 #if defined(PETSC_USE_DEBUGGING) 3142 for (i=0; i<m; i++) { 3143 nnz = Ii[i+1]- Ii[i]; 3144 JJ = J + Ii[i]; 3145 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3146 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3147 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3148 for (j=1; j<nnz; j++) { 3149 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3150 } 3151 } 3152 #endif 3153 3154 for (i=0; i<m; i++) { 3155 nnz = Ii[i+1]- Ii[i]; 3156 JJ = J + Ii[i]; 3157 nnz_max = PetscMax(nnz_max,nnz); 3158 for (j=0; j<nnz; j++) { 3159 if (*JJ >= cstart) break; 3160 JJ++; 3161 } 3162 d = 0; 3163 for (; j<nnz; j++) { 3164 if (*JJ++ >= cend) break; 3165 d++; 3166 } 3167 d_nnz[i] = d; 3168 o_nnz[i] = nnz - d; 3169 } 3170 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3171 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3172 3173 if (v) values = (PetscScalar*)v; 3174 else { 3175 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3176 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3177 } 3178 3179 for (i=0; i<m; i++) { 3180 ii = i + rstart; 3181 nnz = Ii[i+1]- Ii[i]; 3182 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3183 } 3184 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3185 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3186 3187 if (!v) { 3188 ierr = PetscFree(values);CHKERRQ(ierr); 3189 } 3190 PetscFunctionReturn(0); 3191 } 3192 EXTERN_C_END 3193 3194 #undef __FUNCT__ 3195 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3196 /*@ 3197 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3198 (the default parallel PETSc format). 3199 3200 Collective on MPI_Comm 3201 3202 Input Parameters: 3203 + B - the matrix 3204 . i - the indices into j for the start of each local row (starts with zero) 3205 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3206 - v - optional values in the matrix 3207 3208 Level: developer 3209 3210 Notes: 3211 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3212 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3213 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3214 3215 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3216 3217 The format which is used for the sparse matrix input, is equivalent to a 3218 row-major ordering.. i.e for the following matrix, the input data expected is 3219 as shown: 3220 3221 1 0 0 3222 2 0 3 P0 3223 ------- 3224 4 5 6 P1 3225 3226 Process0 [P0]: rows_owned=[0,1] 3227 i = {0,1,3} [size = nrow+1 = 2+1] 3228 j = {0,0,2} [size = nz = 6] 3229 v = {1,2,3} [size = nz = 6] 3230 3231 Process1 [P1]: rows_owned=[2] 3232 i = {0,3} [size = nrow+1 = 1+1] 3233 j = {0,1,2} [size = nz = 6] 3234 v = {4,5,6} [size = nz = 6] 3235 3236 The column indices for each row MUST be sorted. 3237 3238 .keywords: matrix, aij, compressed row, sparse, parallel 3239 3240 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3241 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3242 @*/ 3243 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3244 { 3245 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3246 3247 PetscFunctionBegin; 3248 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3249 if (f) { 3250 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3251 } 3252 PetscFunctionReturn(0); 3253 } 3254 3255 #undef __FUNCT__ 3256 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3257 /*@C 3258 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3259 (the default parallel PETSc format). For good matrix assembly performance 3260 the user should preallocate the matrix storage by setting the parameters 3261 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3262 performance can be increased by more than a factor of 50. 3263 3264 Collective on MPI_Comm 3265 3266 Input Parameters: 3267 + A - the matrix 3268 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3269 (same value is used for all local rows) 3270 . d_nnz - array containing the number of nonzeros in the various rows of the 3271 DIAGONAL portion of the local submatrix (possibly different for each row) 3272 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3273 The size of this array is equal to the number of local rows, i.e 'm'. 3274 You must leave room for the diagonal entry even if it is zero. 3275 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3276 submatrix (same value is used for all local rows). 3277 - o_nnz - array containing the number of nonzeros in the various rows of the 3278 OFF-DIAGONAL portion of the local submatrix (possibly different for 3279 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3280 structure. The size of this array is equal to the number 3281 of local rows, i.e 'm'. 3282 3283 If the *_nnz parameter is given then the *_nz parameter is ignored 3284 3285 The AIJ format (also called the Yale sparse matrix format or 3286 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3287 storage. The stored row and column indices begin with zero. See the users manual for details. 3288 3289 The parallel matrix is partitioned such that the first m0 rows belong to 3290 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3291 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3292 3293 The DIAGONAL portion of the local submatrix of a processor can be defined 3294 as the submatrix which is obtained by extraction the part corresponding 3295 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3296 first row that belongs to the processor, and r2 is the last row belonging 3297 to the this processor. This is a square mxm matrix. The remaining portion 3298 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3299 3300 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3301 3302 You can call MatGetInfo() to get information on how effective the preallocation was; 3303 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3304 You can also run with the option -info and look for messages with the string 3305 malloc in them to see if additional memory allocation was needed. 3306 3307 Example usage: 3308 3309 Consider the following 8x8 matrix with 34 non-zero values, that is 3310 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3311 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3312 as follows: 3313 3314 .vb 3315 1 2 0 | 0 3 0 | 0 4 3316 Proc0 0 5 6 | 7 0 0 | 8 0 3317 9 0 10 | 11 0 0 | 12 0 3318 ------------------------------------- 3319 13 0 14 | 15 16 17 | 0 0 3320 Proc1 0 18 0 | 19 20 21 | 0 0 3321 0 0 0 | 22 23 0 | 24 0 3322 ------------------------------------- 3323 Proc2 25 26 27 | 0 0 28 | 29 0 3324 30 0 0 | 31 32 33 | 0 34 3325 .ve 3326 3327 This can be represented as a collection of submatrices as: 3328 3329 .vb 3330 A B C 3331 D E F 3332 G H I 3333 .ve 3334 3335 Where the submatrices A,B,C are owned by proc0, D,E,F are 3336 owned by proc1, G,H,I are owned by proc2. 3337 3338 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3339 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3340 The 'M','N' parameters are 8,8, and have the same values on all procs. 3341 3342 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3343 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3344 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3345 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3346 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3347 matrix, ans [DF] as another SeqAIJ matrix. 3348 3349 When d_nz, o_nz parameters are specified, d_nz storage elements are 3350 allocated for every row of the local diagonal submatrix, and o_nz 3351 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3352 One way to choose d_nz and o_nz is to use the max nonzerors per local 3353 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3354 In this case, the values of d_nz,o_nz are: 3355 .vb 3356 proc0 : dnz = 2, o_nz = 2 3357 proc1 : dnz = 3, o_nz = 2 3358 proc2 : dnz = 1, o_nz = 4 3359 .ve 3360 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3361 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3362 for proc3. i.e we are using 12+15+10=37 storage locations to store 3363 34 values. 3364 3365 When d_nnz, o_nnz parameters are specified, the storage is specified 3366 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3367 In the above case the values for d_nnz,o_nnz are: 3368 .vb 3369 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3370 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3371 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3372 .ve 3373 Here the space allocated is sum of all the above values i.e 34, and 3374 hence pre-allocation is perfect. 3375 3376 Level: intermediate 3377 3378 .keywords: matrix, aij, compressed row, sparse, parallel 3379 3380 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3381 MPIAIJ, MatGetInfo() 3382 @*/ 3383 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3384 { 3385 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3386 3387 PetscFunctionBegin; 3388 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3389 if (f) { 3390 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3391 } 3392 PetscFunctionReturn(0); 3393 } 3394 3395 #undef __FUNCT__ 3396 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3397 /*@ 3398 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3399 CSR format the local rows. 3400 3401 Collective on MPI_Comm 3402 3403 Input Parameters: 3404 + comm - MPI communicator 3405 . m - number of local rows (Cannot be PETSC_DECIDE) 3406 . n - This value should be the same as the local size used in creating the 3407 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3408 calculated if N is given) For square matrices n is almost always m. 3409 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3410 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3411 . i - row indices 3412 . j - column indices 3413 - a - matrix values 3414 3415 Output Parameter: 3416 . mat - the matrix 3417 3418 Level: intermediate 3419 3420 Notes: 3421 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3422 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3423 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3424 3425 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3426 3427 The format which is used for the sparse matrix input, is equivalent to a 3428 row-major ordering.. i.e for the following matrix, the input data expected is 3429 as shown: 3430 3431 1 0 0 3432 2 0 3 P0 3433 ------- 3434 4 5 6 P1 3435 3436 Process0 [P0]: rows_owned=[0,1] 3437 i = {0,1,3} [size = nrow+1 = 2+1] 3438 j = {0,0,2} [size = nz = 6] 3439 v = {1,2,3} [size = nz = 6] 3440 3441 Process1 [P1]: rows_owned=[2] 3442 i = {0,3} [size = nrow+1 = 1+1] 3443 j = {0,1,2} [size = nz = 6] 3444 v = {4,5,6} [size = nz = 6] 3445 3446 .keywords: matrix, aij, compressed row, sparse, parallel 3447 3448 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3449 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3450 @*/ 3451 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3452 { 3453 PetscErrorCode ierr; 3454 3455 PetscFunctionBegin; 3456 if (i[0]) { 3457 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3458 } 3459 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3460 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3461 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3462 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3463 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3464 PetscFunctionReturn(0); 3465 } 3466 3467 #undef __FUNCT__ 3468 #define __FUNCT__ "MatCreateMPIAIJ" 3469 /*@C 3470 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3471 (the default parallel PETSc format). For good matrix assembly performance 3472 the user should preallocate the matrix storage by setting the parameters 3473 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3474 performance can be increased by more than a factor of 50. 3475 3476 Collective on MPI_Comm 3477 3478 Input Parameters: 3479 + comm - MPI communicator 3480 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3481 This value should be the same as the local size used in creating the 3482 y vector for the matrix-vector product y = Ax. 3483 . n - This value should be the same as the local size used in creating the 3484 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3485 calculated if N is given) For square matrices n is almost always m. 3486 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3487 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3488 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3489 (same value is used for all local rows) 3490 . d_nnz - array containing the number of nonzeros in the various rows of the 3491 DIAGONAL portion of the local submatrix (possibly different for each row) 3492 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3493 The size of this array is equal to the number of local rows, i.e 'm'. 3494 You must leave room for the diagonal entry even if it is zero. 3495 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3496 submatrix (same value is used for all local rows). 3497 - o_nnz - array containing the number of nonzeros in the various rows of the 3498 OFF-DIAGONAL portion of the local submatrix (possibly different for 3499 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3500 structure. The size of this array is equal to the number 3501 of local rows, i.e 'm'. 3502 3503 Output Parameter: 3504 . A - the matrix 3505 3506 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3507 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3508 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3509 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3510 3511 Notes: 3512 If the *_nnz parameter is given then the *_nz parameter is ignored 3513 3514 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3515 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3516 storage requirements for this matrix. 3517 3518 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3519 processor than it must be used on all processors that share the object for 3520 that argument. 3521 3522 The user MUST specify either the local or global matrix dimensions 3523 (possibly both). 3524 3525 The parallel matrix is partitioned across processors such that the 3526 first m0 rows belong to process 0, the next m1 rows belong to 3527 process 1, the next m2 rows belong to process 2 etc.. where 3528 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3529 values corresponding to [m x N] submatrix. 3530 3531 The columns are logically partitioned with the n0 columns belonging 3532 to 0th partition, the next n1 columns belonging to the next 3533 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3534 3535 The DIAGONAL portion of the local submatrix on any given processor 3536 is the submatrix corresponding to the rows and columns m,n 3537 corresponding to the given processor. i.e diagonal matrix on 3538 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3539 etc. The remaining portion of the local submatrix [m x (N-n)] 3540 constitute the OFF-DIAGONAL portion. The example below better 3541 illustrates this concept. 3542 3543 For a square global matrix we define each processor's diagonal portion 3544 to be its local rows and the corresponding columns (a square submatrix); 3545 each processor's off-diagonal portion encompasses the remainder of the 3546 local matrix (a rectangular submatrix). 3547 3548 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3549 3550 When calling this routine with a single process communicator, a matrix of 3551 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3552 type of communicator, use the construction mechanism: 3553 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3554 3555 By default, this format uses inodes (identical nodes) when possible. 3556 We search for consecutive rows with the same nonzero structure, thereby 3557 reusing matrix information to achieve increased efficiency. 3558 3559 Options Database Keys: 3560 + -mat_no_inode - Do not use inodes 3561 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3562 - -mat_aij_oneindex - Internally use indexing starting at 1 3563 rather than 0. Note that when calling MatSetValues(), 3564 the user still MUST index entries starting at 0! 3565 3566 3567 Example usage: 3568 3569 Consider the following 8x8 matrix with 34 non-zero values, that is 3570 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3571 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3572 as follows: 3573 3574 .vb 3575 1 2 0 | 0 3 0 | 0 4 3576 Proc0 0 5 6 | 7 0 0 | 8 0 3577 9 0 10 | 11 0 0 | 12 0 3578 ------------------------------------- 3579 13 0 14 | 15 16 17 | 0 0 3580 Proc1 0 18 0 | 19 20 21 | 0 0 3581 0 0 0 | 22 23 0 | 24 0 3582 ------------------------------------- 3583 Proc2 25 26 27 | 0 0 28 | 29 0 3584 30 0 0 | 31 32 33 | 0 34 3585 .ve 3586 3587 This can be represented as a collection of submatrices as: 3588 3589 .vb 3590 A B C 3591 D E F 3592 G H I 3593 .ve 3594 3595 Where the submatrices A,B,C are owned by proc0, D,E,F are 3596 owned by proc1, G,H,I are owned by proc2. 3597 3598 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3599 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3600 The 'M','N' parameters are 8,8, and have the same values on all procs. 3601 3602 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3603 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3604 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3605 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3606 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3607 matrix, ans [DF] as another SeqAIJ matrix. 3608 3609 When d_nz, o_nz parameters are specified, d_nz storage elements are 3610 allocated for every row of the local diagonal submatrix, and o_nz 3611 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3612 One way to choose d_nz and o_nz is to use the max nonzerors per local 3613 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3614 In this case, the values of d_nz,o_nz are: 3615 .vb 3616 proc0 : dnz = 2, o_nz = 2 3617 proc1 : dnz = 3, o_nz = 2 3618 proc2 : dnz = 1, o_nz = 4 3619 .ve 3620 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3621 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3622 for proc3. i.e we are using 12+15+10=37 storage locations to store 3623 34 values. 3624 3625 When d_nnz, o_nnz parameters are specified, the storage is specified 3626 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3627 In the above case the values for d_nnz,o_nnz are: 3628 .vb 3629 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3630 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3631 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3632 .ve 3633 Here the space allocated is sum of all the above values i.e 34, and 3634 hence pre-allocation is perfect. 3635 3636 Level: intermediate 3637 3638 .keywords: matrix, aij, compressed row, sparse, parallel 3639 3640 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3641 MPIAIJ, MatCreateMPIAIJWithArrays() 3642 @*/ 3643 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3644 { 3645 PetscErrorCode ierr; 3646 PetscMPIInt size; 3647 3648 PetscFunctionBegin; 3649 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3650 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3651 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3652 if (size > 1) { 3653 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3654 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3655 } else { 3656 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3657 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3658 } 3659 PetscFunctionReturn(0); 3660 } 3661 3662 #undef __FUNCT__ 3663 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3664 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3665 { 3666 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3667 3668 PetscFunctionBegin; 3669 *Ad = a->A; 3670 *Ao = a->B; 3671 *colmap = a->garray; 3672 PetscFunctionReturn(0); 3673 } 3674 3675 #undef __FUNCT__ 3676 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3677 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3678 { 3679 PetscErrorCode ierr; 3680 PetscInt i; 3681 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3682 3683 PetscFunctionBegin; 3684 if (coloring->ctype == IS_COLORING_GLOBAL) { 3685 ISColoringValue *allcolors,*colors; 3686 ISColoring ocoloring; 3687 3688 /* set coloring for diagonal portion */ 3689 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3690 3691 /* set coloring for off-diagonal portion */ 3692 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3693 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3694 for (i=0; i<a->B->cmap->n; i++) { 3695 colors[i] = allcolors[a->garray[i]]; 3696 } 3697 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3698 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3699 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3700 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3701 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3702 ISColoringValue *colors; 3703 PetscInt *larray; 3704 ISColoring ocoloring; 3705 3706 /* set coloring for diagonal portion */ 3707 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3708 for (i=0; i<a->A->cmap->n; i++) { 3709 larray[i] = i + A->cmap->rstart; 3710 } 3711 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3712 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3713 for (i=0; i<a->A->cmap->n; i++) { 3714 colors[i] = coloring->colors[larray[i]]; 3715 } 3716 ierr = PetscFree(larray);CHKERRQ(ierr); 3717 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3718 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3719 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3720 3721 /* set coloring for off-diagonal portion */ 3722 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3723 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3724 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3725 for (i=0; i<a->B->cmap->n; i++) { 3726 colors[i] = coloring->colors[larray[i]]; 3727 } 3728 ierr = PetscFree(larray);CHKERRQ(ierr); 3729 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3730 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3731 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3732 } else { 3733 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3734 } 3735 3736 PetscFunctionReturn(0); 3737 } 3738 3739 #if defined(PETSC_HAVE_ADIC) 3740 #undef __FUNCT__ 3741 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3742 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3743 { 3744 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3745 PetscErrorCode ierr; 3746 3747 PetscFunctionBegin; 3748 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3749 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3750 PetscFunctionReturn(0); 3751 } 3752 #endif 3753 3754 #undef __FUNCT__ 3755 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3756 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3757 { 3758 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3759 PetscErrorCode ierr; 3760 3761 PetscFunctionBegin; 3762 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3763 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3764 PetscFunctionReturn(0); 3765 } 3766 3767 #undef __FUNCT__ 3768 #define __FUNCT__ "MatMerge" 3769 /*@ 3770 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3771 matrices from each processor 3772 3773 Collective on MPI_Comm 3774 3775 Input Parameters: 3776 + comm - the communicators the parallel matrix will live on 3777 . inmat - the input sequential matrices 3778 . n - number of local columns (or PETSC_DECIDE) 3779 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3780 3781 Output Parameter: 3782 . outmat - the parallel matrix generated 3783 3784 Level: advanced 3785 3786 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3787 3788 @*/ 3789 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3790 { 3791 PetscErrorCode ierr; 3792 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3793 PetscInt *indx; 3794 PetscScalar *values; 3795 3796 PetscFunctionBegin; 3797 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3798 if (scall == MAT_INITIAL_MATRIX){ 3799 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3800 if (n == PETSC_DECIDE){ 3801 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3802 } 3803 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3804 rstart -= m; 3805 3806 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3807 for (i=0;i<m;i++) { 3808 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3809 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3810 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3811 } 3812 /* This routine will ONLY return MPIAIJ type matrix */ 3813 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3814 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3815 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3816 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3817 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3818 3819 } else if (scall == MAT_REUSE_MATRIX){ 3820 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3821 } else { 3822 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3823 } 3824 3825 for (i=0;i<m;i++) { 3826 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3827 Ii = i + rstart; 3828 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3829 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3830 } 3831 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3832 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3833 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3834 3835 PetscFunctionReturn(0); 3836 } 3837 3838 #undef __FUNCT__ 3839 #define __FUNCT__ "MatFileSplit" 3840 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3841 { 3842 PetscErrorCode ierr; 3843 PetscMPIInt rank; 3844 PetscInt m,N,i,rstart,nnz; 3845 size_t len; 3846 const PetscInt *indx; 3847 PetscViewer out; 3848 char *name; 3849 Mat B; 3850 const PetscScalar *values; 3851 3852 PetscFunctionBegin; 3853 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3854 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3855 /* Should this be the type of the diagonal block of A? */ 3856 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3857 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3858 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3859 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3860 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3861 for (i=0;i<m;i++) { 3862 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3863 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3864 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3865 } 3866 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3867 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3868 3869 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3870 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3871 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3872 sprintf(name,"%s.%d",outfile,rank); 3873 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3874 ierr = PetscFree(name); 3875 ierr = MatView(B,out);CHKERRQ(ierr); 3876 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3877 ierr = MatDestroy(B);CHKERRQ(ierr); 3878 PetscFunctionReturn(0); 3879 } 3880 3881 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3882 #undef __FUNCT__ 3883 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3884 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3885 { 3886 PetscErrorCode ierr; 3887 Mat_Merge_SeqsToMPI *merge; 3888 PetscContainer container; 3889 3890 PetscFunctionBegin; 3891 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3892 if (container) { 3893 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3894 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3895 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3896 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3897 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3898 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3899 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3900 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3901 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3902 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3903 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3904 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3905 3906 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3907 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3908 } 3909 ierr = PetscFree(merge);CHKERRQ(ierr); 3910 3911 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3912 PetscFunctionReturn(0); 3913 } 3914 3915 #include "src/mat/utils/freespace.h" 3916 #include "petscbt.h" 3917 3918 #undef __FUNCT__ 3919 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3920 /*@C 3921 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3922 matrices from each processor 3923 3924 Collective on MPI_Comm 3925 3926 Input Parameters: 3927 + comm - the communicators the parallel matrix will live on 3928 . seqmat - the input sequential matrices 3929 . m - number of local rows (or PETSC_DECIDE) 3930 . n - number of local columns (or PETSC_DECIDE) 3931 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3932 3933 Output Parameter: 3934 . mpimat - the parallel matrix generated 3935 3936 Level: advanced 3937 3938 Notes: 3939 The dimensions of the sequential matrix in each processor MUST be the same. 3940 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3941 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3942 @*/ 3943 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3944 { 3945 PetscErrorCode ierr; 3946 MPI_Comm comm=((PetscObject)mpimat)->comm; 3947 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3948 PetscMPIInt size,rank,taga,*len_s; 3949 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 3950 PetscInt proc,m; 3951 PetscInt **buf_ri,**buf_rj; 3952 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3953 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3954 MPI_Request *s_waits,*r_waits; 3955 MPI_Status *status; 3956 MatScalar *aa=a->a; 3957 MatScalar **abuf_r,*ba_i; 3958 Mat_Merge_SeqsToMPI *merge; 3959 PetscContainer container; 3960 3961 PetscFunctionBegin; 3962 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3963 3964 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3965 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3966 3967 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3968 if (container) { 3969 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3970 } 3971 bi = merge->bi; 3972 bj = merge->bj; 3973 buf_ri = merge->buf_ri; 3974 buf_rj = merge->buf_rj; 3975 3976 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3977 owners = merge->rowmap.range; 3978 len_s = merge->len_s; 3979 3980 /* send and recv matrix values */ 3981 /*-----------------------------*/ 3982 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3983 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3984 3985 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3986 for (proc=0,k=0; proc<size; proc++){ 3987 if (!len_s[proc]) continue; 3988 i = owners[proc]; 3989 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3990 k++; 3991 } 3992 3993 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3994 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3995 ierr = PetscFree(status);CHKERRQ(ierr); 3996 3997 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3998 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3999 4000 /* insert mat values of mpimat */ 4001 /*----------------------------*/ 4002 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4003 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4004 nextrow = buf_ri_k + merge->nrecv; 4005 nextai = nextrow + merge->nrecv; 4006 4007 for (k=0; k<merge->nrecv; k++){ 4008 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4009 nrows = *(buf_ri_k[k]); 4010 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4011 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4012 } 4013 4014 /* set values of ba */ 4015 m = merge->rowmap.n; 4016 for (i=0; i<m; i++) { 4017 arow = owners[rank] + i; 4018 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4019 bnzi = bi[i+1] - bi[i]; 4020 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4021 4022 /* add local non-zero vals of this proc's seqmat into ba */ 4023 anzi = ai[arow+1] - ai[arow]; 4024 aj = a->j + ai[arow]; 4025 aa = a->a + ai[arow]; 4026 nextaj = 0; 4027 for (j=0; nextaj<anzi; j++){ 4028 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4029 ba_i[j] += aa[nextaj++]; 4030 } 4031 } 4032 4033 /* add received vals into ba */ 4034 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4035 /* i-th row */ 4036 if (i == *nextrow[k]) { 4037 anzi = *(nextai[k]+1) - *nextai[k]; 4038 aj = buf_rj[k] + *(nextai[k]); 4039 aa = abuf_r[k] + *(nextai[k]); 4040 nextaj = 0; 4041 for (j=0; nextaj<anzi; j++){ 4042 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4043 ba_i[j] += aa[nextaj++]; 4044 } 4045 } 4046 nextrow[k]++; nextai[k]++; 4047 } 4048 } 4049 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4050 } 4051 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4052 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4053 4054 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4055 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4056 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4057 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4058 PetscFunctionReturn(0); 4059 } 4060 4061 #undef __FUNCT__ 4062 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4063 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4064 { 4065 PetscErrorCode ierr; 4066 Mat B_mpi; 4067 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4068 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4069 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4070 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4071 PetscInt len,proc,*dnz,*onz; 4072 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4073 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4074 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4075 MPI_Status *status; 4076 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4077 PetscBT lnkbt; 4078 Mat_Merge_SeqsToMPI *merge; 4079 PetscContainer container; 4080 4081 PetscFunctionBegin; 4082 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4083 4084 /* make sure it is a PETSc comm */ 4085 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4086 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4087 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4088 4089 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4090 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4091 4092 /* determine row ownership */ 4093 /*---------------------------------------------------------*/ 4094 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4095 merge->rowmap.n = m; 4096 merge->rowmap.N = M; 4097 merge->rowmap.bs = 1; 4098 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4099 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4100 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4101 4102 m = merge->rowmap.n; 4103 M = merge->rowmap.N; 4104 owners = merge->rowmap.range; 4105 4106 /* determine the number of messages to send, their lengths */ 4107 /*---------------------------------------------------------*/ 4108 len_s = merge->len_s; 4109 4110 len = 0; /* length of buf_si[] */ 4111 merge->nsend = 0; 4112 for (proc=0; proc<size; proc++){ 4113 len_si[proc] = 0; 4114 if (proc == rank){ 4115 len_s[proc] = 0; 4116 } else { 4117 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4118 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4119 } 4120 if (len_s[proc]) { 4121 merge->nsend++; 4122 nrows = 0; 4123 for (i=owners[proc]; i<owners[proc+1]; i++){ 4124 if (ai[i+1] > ai[i]) nrows++; 4125 } 4126 len_si[proc] = 2*(nrows+1); 4127 len += len_si[proc]; 4128 } 4129 } 4130 4131 /* determine the number and length of messages to receive for ij-structure */ 4132 /*-------------------------------------------------------------------------*/ 4133 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4134 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4135 4136 /* post the Irecv of j-structure */ 4137 /*-------------------------------*/ 4138 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4139 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4140 4141 /* post the Isend of j-structure */ 4142 /*--------------------------------*/ 4143 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4144 sj_waits = si_waits + merge->nsend; 4145 4146 for (proc=0, k=0; proc<size; proc++){ 4147 if (!len_s[proc]) continue; 4148 i = owners[proc]; 4149 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4150 k++; 4151 } 4152 4153 /* receives and sends of j-structure are complete */ 4154 /*------------------------------------------------*/ 4155 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4156 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4157 4158 /* send and recv i-structure */ 4159 /*---------------------------*/ 4160 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4161 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4162 4163 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4164 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4165 for (proc=0,k=0; proc<size; proc++){ 4166 if (!len_s[proc]) continue; 4167 /* form outgoing message for i-structure: 4168 buf_si[0]: nrows to be sent 4169 [1:nrows]: row index (global) 4170 [nrows+1:2*nrows+1]: i-structure index 4171 */ 4172 /*-------------------------------------------*/ 4173 nrows = len_si[proc]/2 - 1; 4174 buf_si_i = buf_si + nrows+1; 4175 buf_si[0] = nrows; 4176 buf_si_i[0] = 0; 4177 nrows = 0; 4178 for (i=owners[proc]; i<owners[proc+1]; i++){ 4179 anzi = ai[i+1] - ai[i]; 4180 if (anzi) { 4181 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4182 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4183 nrows++; 4184 } 4185 } 4186 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4187 k++; 4188 buf_si += len_si[proc]; 4189 } 4190 4191 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4192 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4193 4194 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4195 for (i=0; i<merge->nrecv; i++){ 4196 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4197 } 4198 4199 ierr = PetscFree(len_si);CHKERRQ(ierr); 4200 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4201 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4202 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4203 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4204 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4205 ierr = PetscFree(status);CHKERRQ(ierr); 4206 4207 /* compute a local seq matrix in each processor */ 4208 /*----------------------------------------------*/ 4209 /* allocate bi array and free space for accumulating nonzero column info */ 4210 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4211 bi[0] = 0; 4212 4213 /* create and initialize a linked list */ 4214 nlnk = N+1; 4215 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4216 4217 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4218 len = 0; 4219 len = ai[owners[rank+1]] - ai[owners[rank]]; 4220 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4221 current_space = free_space; 4222 4223 /* determine symbolic info for each local row */ 4224 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4225 nextrow = buf_ri_k + merge->nrecv; 4226 nextai = nextrow + merge->nrecv; 4227 for (k=0; k<merge->nrecv; k++){ 4228 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4229 nrows = *buf_ri_k[k]; 4230 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4231 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4232 } 4233 4234 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4235 len = 0; 4236 for (i=0;i<m;i++) { 4237 bnzi = 0; 4238 /* add local non-zero cols of this proc's seqmat into lnk */ 4239 arow = owners[rank] + i; 4240 anzi = ai[arow+1] - ai[arow]; 4241 aj = a->j + ai[arow]; 4242 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4243 bnzi += nlnk; 4244 /* add received col data into lnk */ 4245 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4246 if (i == *nextrow[k]) { /* i-th row */ 4247 anzi = *(nextai[k]+1) - *nextai[k]; 4248 aj = buf_rj[k] + *nextai[k]; 4249 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4250 bnzi += nlnk; 4251 nextrow[k]++; nextai[k]++; 4252 } 4253 } 4254 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4255 4256 /* if free space is not available, make more free space */ 4257 if (current_space->local_remaining<bnzi) { 4258 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4259 nspacedouble++; 4260 } 4261 /* copy data into free space, then initialize lnk */ 4262 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4263 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4264 4265 current_space->array += bnzi; 4266 current_space->local_used += bnzi; 4267 current_space->local_remaining -= bnzi; 4268 4269 bi[i+1] = bi[i] + bnzi; 4270 } 4271 4272 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4273 4274 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4275 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4276 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4277 4278 /* create symbolic parallel matrix B_mpi */ 4279 /*---------------------------------------*/ 4280 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4281 if (n==PETSC_DECIDE) { 4282 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4283 } else { 4284 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4285 } 4286 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4287 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4288 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4289 4290 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4291 B_mpi->assembled = PETSC_FALSE; 4292 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4293 merge->bi = bi; 4294 merge->bj = bj; 4295 merge->buf_ri = buf_ri; 4296 merge->buf_rj = buf_rj; 4297 merge->coi = PETSC_NULL; 4298 merge->coj = PETSC_NULL; 4299 merge->owners_co = PETSC_NULL; 4300 4301 /* attach the supporting struct to B_mpi for reuse */ 4302 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4303 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4304 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4305 *mpimat = B_mpi; 4306 4307 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4308 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4309 PetscFunctionReturn(0); 4310 } 4311 4312 #undef __FUNCT__ 4313 #define __FUNCT__ "MatMerge_SeqsToMPI" 4314 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4315 { 4316 PetscErrorCode ierr; 4317 4318 PetscFunctionBegin; 4319 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4320 if (scall == MAT_INITIAL_MATRIX){ 4321 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4322 } 4323 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4324 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4325 PetscFunctionReturn(0); 4326 } 4327 4328 #undef __FUNCT__ 4329 #define __FUNCT__ "MatGetLocalMat" 4330 /*@ 4331 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4332 4333 Not Collective 4334 4335 Input Parameters: 4336 + A - the matrix 4337 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4338 4339 Output Parameter: 4340 . A_loc - the local sequential matrix generated 4341 4342 Level: developer 4343 4344 @*/ 4345 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4346 { 4347 PetscErrorCode ierr; 4348 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4349 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4350 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4351 MatScalar *aa=a->a,*ba=b->a,*cam; 4352 PetscScalar *ca; 4353 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4354 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4355 4356 PetscFunctionBegin; 4357 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4358 if (scall == MAT_INITIAL_MATRIX){ 4359 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4360 ci[0] = 0; 4361 for (i=0; i<am; i++){ 4362 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4363 } 4364 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4365 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4366 k = 0; 4367 for (i=0; i<am; i++) { 4368 ncols_o = bi[i+1] - bi[i]; 4369 ncols_d = ai[i+1] - ai[i]; 4370 /* off-diagonal portion of A */ 4371 for (jo=0; jo<ncols_o; jo++) { 4372 col = cmap[*bj]; 4373 if (col >= cstart) break; 4374 cj[k] = col; bj++; 4375 ca[k++] = *ba++; 4376 } 4377 /* diagonal portion of A */ 4378 for (j=0; j<ncols_d; j++) { 4379 cj[k] = cstart + *aj++; 4380 ca[k++] = *aa++; 4381 } 4382 /* off-diagonal portion of A */ 4383 for (j=jo; j<ncols_o; j++) { 4384 cj[k] = cmap[*bj++]; 4385 ca[k++] = *ba++; 4386 } 4387 } 4388 /* put together the new matrix */ 4389 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4390 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4391 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4392 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4393 mat->free_a = PETSC_TRUE; 4394 mat->free_ij = PETSC_TRUE; 4395 mat->nonew = 0; 4396 } else if (scall == MAT_REUSE_MATRIX){ 4397 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4398 ci = mat->i; cj = mat->j; cam = mat->a; 4399 for (i=0; i<am; i++) { 4400 /* off-diagonal portion of A */ 4401 ncols_o = bi[i+1] - bi[i]; 4402 for (jo=0; jo<ncols_o; jo++) { 4403 col = cmap[*bj]; 4404 if (col >= cstart) break; 4405 *cam++ = *ba++; bj++; 4406 } 4407 /* diagonal portion of A */ 4408 ncols_d = ai[i+1] - ai[i]; 4409 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4410 /* off-diagonal portion of A */ 4411 for (j=jo; j<ncols_o; j++) { 4412 *cam++ = *ba++; bj++; 4413 } 4414 } 4415 } else { 4416 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4417 } 4418 4419 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4420 PetscFunctionReturn(0); 4421 } 4422 4423 #undef __FUNCT__ 4424 #define __FUNCT__ "MatGetLocalMatCondensed" 4425 /*@C 4426 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4427 4428 Not Collective 4429 4430 Input Parameters: 4431 + A - the matrix 4432 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4433 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4434 4435 Output Parameter: 4436 . A_loc - the local sequential matrix generated 4437 4438 Level: developer 4439 4440 @*/ 4441 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4442 { 4443 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4444 PetscErrorCode ierr; 4445 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4446 IS isrowa,iscola; 4447 Mat *aloc; 4448 4449 PetscFunctionBegin; 4450 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4451 if (!row){ 4452 start = A->rmap->rstart; end = A->rmap->rend; 4453 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4454 } else { 4455 isrowa = *row; 4456 } 4457 if (!col){ 4458 start = A->cmap->rstart; 4459 cmap = a->garray; 4460 nzA = a->A->cmap->n; 4461 nzB = a->B->cmap->n; 4462 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4463 ncols = 0; 4464 for (i=0; i<nzB; i++) { 4465 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4466 else break; 4467 } 4468 imark = i; 4469 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4470 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4471 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4472 ierr = PetscFree(idx);CHKERRQ(ierr); 4473 } else { 4474 iscola = *col; 4475 } 4476 if (scall != MAT_INITIAL_MATRIX){ 4477 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4478 aloc[0] = *A_loc; 4479 } 4480 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4481 *A_loc = aloc[0]; 4482 ierr = PetscFree(aloc);CHKERRQ(ierr); 4483 if (!row){ 4484 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4485 } 4486 if (!col){ 4487 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4488 } 4489 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4490 PetscFunctionReturn(0); 4491 } 4492 4493 #undef __FUNCT__ 4494 #define __FUNCT__ "MatGetBrowsOfAcols" 4495 /*@C 4496 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4497 4498 Collective on Mat 4499 4500 Input Parameters: 4501 + A,B - the matrices in mpiaij format 4502 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4503 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4504 4505 Output Parameter: 4506 + rowb, colb - index sets of rows and columns of B to extract 4507 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4508 - B_seq - the sequential matrix generated 4509 4510 Level: developer 4511 4512 @*/ 4513 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4514 { 4515 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4516 PetscErrorCode ierr; 4517 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4518 IS isrowb,iscolb; 4519 Mat *bseq; 4520 4521 PetscFunctionBegin; 4522 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4523 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4524 } 4525 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4526 4527 if (scall == MAT_INITIAL_MATRIX){ 4528 start = A->cmap->rstart; 4529 cmap = a->garray; 4530 nzA = a->A->cmap->n; 4531 nzB = a->B->cmap->n; 4532 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4533 ncols = 0; 4534 for (i=0; i<nzB; i++) { /* row < local row index */ 4535 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4536 else break; 4537 } 4538 imark = i; 4539 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4540 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4541 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4542 ierr = PetscFree(idx);CHKERRQ(ierr); 4543 *brstart = imark; 4544 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4545 } else { 4546 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4547 isrowb = *rowb; iscolb = *colb; 4548 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4549 bseq[0] = *B_seq; 4550 } 4551 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4552 *B_seq = bseq[0]; 4553 ierr = PetscFree(bseq);CHKERRQ(ierr); 4554 if (!rowb){ 4555 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4556 } else { 4557 *rowb = isrowb; 4558 } 4559 if (!colb){ 4560 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4561 } else { 4562 *colb = iscolb; 4563 } 4564 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4565 PetscFunctionReturn(0); 4566 } 4567 4568 #undef __FUNCT__ 4569 #define __FUNCT__ "MatGetBrowsOfAoCols" 4570 /*@C 4571 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4572 of the OFF-DIAGONAL portion of local A 4573 4574 Collective on Mat 4575 4576 Input Parameters: 4577 + A,B - the matrices in mpiaij format 4578 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4579 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4580 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4581 4582 Output Parameter: 4583 + B_oth - the sequential matrix generated 4584 4585 Level: developer 4586 4587 @*/ 4588 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4589 { 4590 VecScatter_MPI_General *gen_to,*gen_from; 4591 PetscErrorCode ierr; 4592 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4593 Mat_SeqAIJ *b_oth; 4594 VecScatter ctx=a->Mvctx; 4595 MPI_Comm comm=((PetscObject)ctx)->comm; 4596 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4597 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4598 PetscScalar *rvalues,*svalues; 4599 MatScalar *b_otha,*bufa,*bufA; 4600 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4601 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4602 MPI_Status *sstatus,rstatus; 4603 PetscMPIInt jj; 4604 PetscInt *cols,sbs,rbs; 4605 PetscScalar *vals; 4606 4607 PetscFunctionBegin; 4608 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4609 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4610 } 4611 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4612 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4613 4614 gen_to = (VecScatter_MPI_General*)ctx->todata; 4615 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4616 rvalues = gen_from->values; /* holds the length of receiving row */ 4617 svalues = gen_to->values; /* holds the length of sending row */ 4618 nrecvs = gen_from->n; 4619 nsends = gen_to->n; 4620 4621 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4622 srow = gen_to->indices; /* local row index to be sent */ 4623 sstarts = gen_to->starts; 4624 sprocs = gen_to->procs; 4625 sstatus = gen_to->sstatus; 4626 sbs = gen_to->bs; 4627 rstarts = gen_from->starts; 4628 rprocs = gen_from->procs; 4629 rbs = gen_from->bs; 4630 4631 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4632 if (scall == MAT_INITIAL_MATRIX){ 4633 /* i-array */ 4634 /*---------*/ 4635 /* post receives */ 4636 for (i=0; i<nrecvs; i++){ 4637 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4638 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4639 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4640 } 4641 4642 /* pack the outgoing message */ 4643 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4644 rstartsj = sstartsj + nsends +1; 4645 sstartsj[0] = 0; rstartsj[0] = 0; 4646 len = 0; /* total length of j or a array to be sent */ 4647 k = 0; 4648 for (i=0; i<nsends; i++){ 4649 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4650 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4651 for (j=0; j<nrows; j++) { 4652 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4653 for (l=0; l<sbs; l++){ 4654 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4655 rowlen[j*sbs+l] = ncols; 4656 len += ncols; 4657 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4658 } 4659 k++; 4660 } 4661 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4662 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4663 } 4664 /* recvs and sends of i-array are completed */ 4665 i = nrecvs; 4666 while (i--) { 4667 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4668 } 4669 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4670 4671 /* allocate buffers for sending j and a arrays */ 4672 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4673 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4674 4675 /* create i-array of B_oth */ 4676 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4677 b_othi[0] = 0; 4678 len = 0; /* total length of j or a array to be received */ 4679 k = 0; 4680 for (i=0; i<nrecvs; i++){ 4681 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4682 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4683 for (j=0; j<nrows; j++) { 4684 b_othi[k+1] = b_othi[k] + rowlen[j]; 4685 len += rowlen[j]; k++; 4686 } 4687 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4688 } 4689 4690 /* allocate space for j and a arrrays of B_oth */ 4691 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4692 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4693 4694 /* j-array */ 4695 /*---------*/ 4696 /* post receives of j-array */ 4697 for (i=0; i<nrecvs; i++){ 4698 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4699 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4700 } 4701 4702 /* pack the outgoing message j-array */ 4703 k = 0; 4704 for (i=0; i<nsends; i++){ 4705 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4706 bufJ = bufj+sstartsj[i]; 4707 for (j=0; j<nrows; j++) { 4708 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4709 for (ll=0; ll<sbs; ll++){ 4710 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4711 for (l=0; l<ncols; l++){ 4712 *bufJ++ = cols[l]; 4713 } 4714 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4715 } 4716 } 4717 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4718 } 4719 4720 /* recvs and sends of j-array are completed */ 4721 i = nrecvs; 4722 while (i--) { 4723 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4724 } 4725 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4726 } else if (scall == MAT_REUSE_MATRIX){ 4727 sstartsj = *startsj; 4728 rstartsj = sstartsj + nsends +1; 4729 bufa = *bufa_ptr; 4730 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4731 b_otha = b_oth->a; 4732 } else { 4733 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4734 } 4735 4736 /* a-array */ 4737 /*---------*/ 4738 /* post receives of a-array */ 4739 for (i=0; i<nrecvs; i++){ 4740 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4741 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4742 } 4743 4744 /* pack the outgoing message a-array */ 4745 k = 0; 4746 for (i=0; i<nsends; i++){ 4747 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4748 bufA = bufa+sstartsj[i]; 4749 for (j=0; j<nrows; j++) { 4750 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4751 for (ll=0; ll<sbs; ll++){ 4752 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4753 for (l=0; l<ncols; l++){ 4754 *bufA++ = vals[l]; 4755 } 4756 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4757 } 4758 } 4759 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4760 } 4761 /* recvs and sends of a-array are completed */ 4762 i = nrecvs; 4763 while (i--) { 4764 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4765 } 4766 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4767 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4768 4769 if (scall == MAT_INITIAL_MATRIX){ 4770 /* put together the new matrix */ 4771 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4772 4773 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4774 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4775 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4776 b_oth->free_a = PETSC_TRUE; 4777 b_oth->free_ij = PETSC_TRUE; 4778 b_oth->nonew = 0; 4779 4780 ierr = PetscFree(bufj);CHKERRQ(ierr); 4781 if (!startsj || !bufa_ptr){ 4782 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4783 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4784 } else { 4785 *startsj = sstartsj; 4786 *bufa_ptr = bufa; 4787 } 4788 } 4789 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4790 PetscFunctionReturn(0); 4791 } 4792 4793 #undef __FUNCT__ 4794 #define __FUNCT__ "MatGetCommunicationStructs" 4795 /*@C 4796 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4797 4798 Not Collective 4799 4800 Input Parameters: 4801 . A - The matrix in mpiaij format 4802 4803 Output Parameter: 4804 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4805 . colmap - A map from global column index to local index into lvec 4806 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4807 4808 Level: developer 4809 4810 @*/ 4811 #if defined (PETSC_USE_CTABLE) 4812 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4813 #else 4814 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4815 #endif 4816 { 4817 Mat_MPIAIJ *a; 4818 4819 PetscFunctionBegin; 4820 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4821 PetscValidPointer(lvec, 2) 4822 PetscValidPointer(colmap, 3) 4823 PetscValidPointer(multScatter, 4) 4824 a = (Mat_MPIAIJ *) A->data; 4825 if (lvec) *lvec = a->lvec; 4826 if (colmap) *colmap = a->colmap; 4827 if (multScatter) *multScatter = a->Mvctx; 4828 PetscFunctionReturn(0); 4829 } 4830 4831 EXTERN_C_BEGIN 4832 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4833 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4834 EXTERN_C_END 4835 4836 #include "src/mat/impls/dense/mpi/mpidense.h" 4837 4838 #undef __FUNCT__ 4839 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4840 /* 4841 Computes (B'*A')' since computing B*A directly is untenable 4842 4843 n p p 4844 ( ) ( ) ( ) 4845 m ( A ) * n ( B ) = m ( C ) 4846 ( ) ( ) ( ) 4847 4848 */ 4849 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4850 { 4851 PetscErrorCode ierr; 4852 Mat At,Bt,Ct; 4853 4854 PetscFunctionBegin; 4855 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4856 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4857 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4858 ierr = MatDestroy(At);CHKERRQ(ierr); 4859 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4860 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4861 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4862 PetscFunctionReturn(0); 4863 } 4864 4865 #undef __FUNCT__ 4866 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4867 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4868 { 4869 PetscErrorCode ierr; 4870 PetscInt m=A->rmap->n,n=B->cmap->n; 4871 Mat Cmat; 4872 4873 PetscFunctionBegin; 4874 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4875 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4876 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4877 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4878 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4879 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4880 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4881 *C = Cmat; 4882 PetscFunctionReturn(0); 4883 } 4884 4885 /* ----------------------------------------------------------------*/ 4886 #undef __FUNCT__ 4887 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4888 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4889 { 4890 PetscErrorCode ierr; 4891 4892 PetscFunctionBegin; 4893 if (scall == MAT_INITIAL_MATRIX){ 4894 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4895 } 4896 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4897 PetscFunctionReturn(0); 4898 } 4899 4900 EXTERN_C_BEGIN 4901 #if defined(PETSC_HAVE_MUMPS) 4902 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4903 #endif 4904 #if defined(PETSC_HAVE_SUPERLU_DIST) 4905 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4906 #endif 4907 #if defined(PETSC_HAVE_SPOOLES) 4908 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4909 #endif 4910 EXTERN_C_END 4911 4912 /*MC 4913 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4914 4915 Options Database Keys: 4916 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4917 4918 Level: beginner 4919 4920 .seealso: MatCreateMPIAIJ() 4921 M*/ 4922 4923 EXTERN_C_BEGIN 4924 #undef __FUNCT__ 4925 #define __FUNCT__ "MatCreate_MPIAIJ" 4926 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4927 { 4928 Mat_MPIAIJ *b; 4929 PetscErrorCode ierr; 4930 PetscMPIInt size; 4931 4932 PetscFunctionBegin; 4933 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4934 4935 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4936 B->data = (void*)b; 4937 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4938 B->rmap->bs = 1; 4939 B->assembled = PETSC_FALSE; 4940 B->mapping = 0; 4941 4942 B->insertmode = NOT_SET_VALUES; 4943 b->size = size; 4944 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4945 4946 /* build cache for off array entries formed */ 4947 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4948 b->donotstash = PETSC_FALSE; 4949 b->colmap = 0; 4950 b->garray = 0; 4951 b->roworiented = PETSC_TRUE; 4952 4953 /* stuff used for matrix vector multiply */ 4954 b->lvec = PETSC_NULL; 4955 b->Mvctx = PETSC_NULL; 4956 4957 /* stuff for MatGetRow() */ 4958 b->rowindices = 0; 4959 b->rowvalues = 0; 4960 b->getrowactive = PETSC_FALSE; 4961 4962 #if defined(PETSC_HAVE_SPOOLES) 4963 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 4964 "MatGetFactor_mpiaij_spooles", 4965 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 4966 #endif 4967 #if defined(PETSC_HAVE_MUMPS) 4968 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 4969 "MatGetFactor_mpiaij_mumps", 4970 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 4971 #endif 4972 #if defined(PETSC_HAVE_SUPERLU_DIST) 4973 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 4974 "MatGetFactor_mpiaij_superlu_dist", 4975 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 4976 #endif 4977 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4978 "MatStoreValues_MPIAIJ", 4979 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4980 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4981 "MatRetrieveValues_MPIAIJ", 4982 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4983 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4984 "MatGetDiagonalBlock_MPIAIJ", 4985 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4986 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4987 "MatIsTranspose_MPIAIJ", 4988 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4989 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4990 "MatMPIAIJSetPreallocation_MPIAIJ", 4991 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4992 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4993 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4994 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4995 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4996 "MatDiagonalScaleLocal_MPIAIJ", 4997 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4998 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4999 "MatConvert_MPIAIJ_MPICSRPERM", 5000 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5001 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5002 "MatConvert_MPIAIJ_MPICRL", 5003 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5004 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5005 "MatMatMult_MPIDense_MPIAIJ", 5006 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5007 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5008 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5009 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5010 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5011 "MatMatMultNumeric_MPIDense_MPIAIJ", 5012 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5013 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5014 PetscFunctionReturn(0); 5015 } 5016 EXTERN_C_END 5017 5018 #undef __FUNCT__ 5019 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5020 /*@ 5021 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5022 and "off-diagonal" part of the matrix in CSR format. 5023 5024 Collective on MPI_Comm 5025 5026 Input Parameters: 5027 + comm - MPI communicator 5028 . m - number of local rows (Cannot be PETSC_DECIDE) 5029 . n - This value should be the same as the local size used in creating the 5030 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5031 calculated if N is given) For square matrices n is almost always m. 5032 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5033 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5034 . i - row indices for "diagonal" portion of matrix 5035 . j - column indices 5036 . a - matrix values 5037 . oi - row indices for "off-diagonal" portion of matrix 5038 . oj - column indices 5039 - oa - matrix values 5040 5041 Output Parameter: 5042 . mat - the matrix 5043 5044 Level: advanced 5045 5046 Notes: 5047 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5048 5049 The i and j indices are 0 based 5050 5051 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5052 5053 5054 .keywords: matrix, aij, compressed row, sparse, parallel 5055 5056 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5057 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5058 @*/ 5059 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5060 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5061 { 5062 PetscErrorCode ierr; 5063 Mat_MPIAIJ *maij; 5064 5065 PetscFunctionBegin; 5066 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5067 if (i[0]) { 5068 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5069 } 5070 if (oi[0]) { 5071 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5072 } 5073 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5074 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5075 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5076 maij = (Mat_MPIAIJ*) (*mat)->data; 5077 maij->donotstash = PETSC_TRUE; 5078 (*mat)->preallocated = PETSC_TRUE; 5079 5080 (*mat)->rmap->bs = (*mat)->cmap->bs = 1; 5081 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5082 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5083 5084 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5085 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5086 5087 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5088 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5089 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5090 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5091 5092 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5093 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5094 PetscFunctionReturn(0); 5095 } 5096 5097 /* 5098 Special version for direct calls from Fortran 5099 */ 5100 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5101 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5103 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5104 #endif 5105 5106 /* Change these macros so can be used in void function */ 5107 #undef CHKERRQ 5108 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5109 #undef SETERRQ2 5110 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5111 #undef SETERRQ 5112 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5113 5114 EXTERN_C_BEGIN 5115 #undef __FUNCT__ 5116 #define __FUNCT__ "matsetvaluesmpiaij_" 5117 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5118 { 5119 Mat mat = *mmat; 5120 PetscInt m = *mm, n = *mn; 5121 InsertMode addv = *maddv; 5122 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5123 PetscScalar value; 5124 PetscErrorCode ierr; 5125 5126 MatPreallocated(mat); 5127 if (mat->insertmode == NOT_SET_VALUES) { 5128 mat->insertmode = addv; 5129 } 5130 #if defined(PETSC_USE_DEBUG) 5131 else if (mat->insertmode != addv) { 5132 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5133 } 5134 #endif 5135 { 5136 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5137 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5138 PetscTruth roworiented = aij->roworiented; 5139 5140 /* Some Variables required in the macro */ 5141 Mat A = aij->A; 5142 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5143 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5144 MatScalar *aa = a->a; 5145 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5146 Mat B = aij->B; 5147 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5148 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5149 MatScalar *ba = b->a; 5150 5151 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5152 PetscInt nonew = a->nonew; 5153 MatScalar *ap1,*ap2; 5154 5155 PetscFunctionBegin; 5156 for (i=0; i<m; i++) { 5157 if (im[i] < 0) continue; 5158 #if defined(PETSC_USE_DEBUG) 5159 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5160 #endif 5161 if (im[i] >= rstart && im[i] < rend) { 5162 row = im[i] - rstart; 5163 lastcol1 = -1; 5164 rp1 = aj + ai[row]; 5165 ap1 = aa + ai[row]; 5166 rmax1 = aimax[row]; 5167 nrow1 = ailen[row]; 5168 low1 = 0; 5169 high1 = nrow1; 5170 lastcol2 = -1; 5171 rp2 = bj + bi[row]; 5172 ap2 = ba + bi[row]; 5173 rmax2 = bimax[row]; 5174 nrow2 = bilen[row]; 5175 low2 = 0; 5176 high2 = nrow2; 5177 5178 for (j=0; j<n; j++) { 5179 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5180 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5181 if (in[j] >= cstart && in[j] < cend){ 5182 col = in[j] - cstart; 5183 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5184 } else if (in[j] < 0) continue; 5185 #if defined(PETSC_USE_DEBUG) 5186 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5187 #endif 5188 else { 5189 if (mat->was_assembled) { 5190 if (!aij->colmap) { 5191 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5192 } 5193 #if defined (PETSC_USE_CTABLE) 5194 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5195 col--; 5196 #else 5197 col = aij->colmap[in[j]] - 1; 5198 #endif 5199 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5200 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5201 col = in[j]; 5202 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5203 B = aij->B; 5204 b = (Mat_SeqAIJ*)B->data; 5205 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5206 rp2 = bj + bi[row]; 5207 ap2 = ba + bi[row]; 5208 rmax2 = bimax[row]; 5209 nrow2 = bilen[row]; 5210 low2 = 0; 5211 high2 = nrow2; 5212 bm = aij->B->rmap->n; 5213 ba = b->a; 5214 } 5215 } else col = in[j]; 5216 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5217 } 5218 } 5219 } else { 5220 if (!aij->donotstash) { 5221 if (roworiented) { 5222 if (ignorezeroentries && v[i*n] == 0.0) continue; 5223 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5224 } else { 5225 if (ignorezeroentries && v[i] == 0.0) continue; 5226 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5227 } 5228 } 5229 } 5230 }} 5231 PetscFunctionReturnVoid(); 5232 } 5233 EXTERN_C_END 5234 5235