1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap->N; 932 header[2] = mat->cmap->N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap->n; 938 range = mat->rmap->range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap->n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap->n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap->n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap->n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap->N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 1114 1115 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1116 if (!rank) { 1117 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1118 } else { 1119 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1120 } 1121 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1122 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1123 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1125 1126 /* copy over the A part */ 1127 Aloc = (Mat_SeqAIJ*)aij->A->data; 1128 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1129 row = mat->rmap->rstart; 1130 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1131 for (i=0; i<m; i++) { 1132 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1133 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1134 } 1135 aj = Aloc->j; 1136 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1137 1138 /* copy over the B part */ 1139 Aloc = (Mat_SeqAIJ*)aij->B->data; 1140 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1141 row = mat->rmap->rstart; 1142 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1143 ct = cols; 1144 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1148 } 1149 ierr = PetscFree(ct);CHKERRQ(ierr); 1150 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1151 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 /* 1153 Everyone has to call to draw the matrix since the graphics waits are 1154 synchronized across all processors that share the PetscDraw object 1155 */ 1156 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1157 if (!rank) { 1158 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1159 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1160 } 1161 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1162 ierr = MatDestroy(A);CHKERRQ(ierr); 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatView_MPIAIJ" 1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscErrorCode ierr; 1172 PetscTruth iascii,isdraw,issocket,isbinary; 1173 1174 PetscFunctionBegin; 1175 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1176 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1177 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1178 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1179 if (iascii || isdraw || isbinary || issocket) { 1180 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1181 } else { 1182 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRelax_MPIAIJ" 1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1190 { 1191 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1192 PetscErrorCode ierr; 1193 Vec bb1; 1194 1195 PetscFunctionBegin; 1196 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1197 1198 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1199 if (flag & SOR_ZERO_INITIAL_GUESS) { 1200 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1201 its--; 1202 } 1203 1204 while (its--) { 1205 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 1208 /* update rhs: bb1 = bb - B*x */ 1209 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1211 1212 /* local sweep */ 1213 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1214 } 1215 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1216 if (flag & SOR_ZERO_INITIAL_GUESS) { 1217 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1218 its--; 1219 } 1220 while (its--) { 1221 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 1224 /* update rhs: bb1 = bb - B*x */ 1225 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1226 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1227 1228 /* local sweep */ 1229 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1230 } 1231 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1232 if (flag & SOR_ZERO_INITIAL_GUESS) { 1233 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1234 its--; 1235 } 1236 while (its--) { 1237 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* update rhs: bb1 = bb - B*x */ 1241 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1242 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1243 1244 /* local sweep */ 1245 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1246 } 1247 } else { 1248 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1249 } 1250 1251 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatPermute_MPIAIJ" 1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1258 { 1259 MPI_Comm comm,pcomm; 1260 PetscInt first,local_size,nrows; 1261 const PetscInt *rows; 1262 int ntids; 1263 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1268 /* make a collective version of 'rowp' */ 1269 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1270 if (pcomm==comm) { 1271 crowp = rowp; 1272 } else { 1273 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1274 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1275 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1276 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1277 } 1278 /* collect the global row permutation and invert it */ 1279 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1280 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1281 if (pcomm!=comm) { 1282 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1283 } 1284 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1285 /* get the local target indices */ 1286 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1287 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1288 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1289 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1290 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1291 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1292 /* the column permutation is so much easier; 1293 make a local version of 'colp' and invert it */ 1294 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1295 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1296 if (ntids==1) { 1297 lcolp = colp; 1298 } else { 1299 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1300 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1301 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1302 } 1303 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1304 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1305 if (ntids>1) { 1306 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1307 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1308 } 1309 /* now we just get the submatrix */ 1310 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1311 /* clean up */ 1312 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1313 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1319 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1320 { 1321 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1322 Mat A = mat->A,B = mat->B; 1323 PetscErrorCode ierr; 1324 PetscReal isend[5],irecv[5]; 1325 1326 PetscFunctionBegin; 1327 info->block_size = 1.0; 1328 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1329 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1330 isend[3] = info->memory; isend[4] = info->mallocs; 1331 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1332 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1333 isend[3] += info->memory; isend[4] += info->mallocs; 1334 if (flag == MAT_LOCAL) { 1335 info->nz_used = isend[0]; 1336 info->nz_allocated = isend[1]; 1337 info->nz_unneeded = isend[2]; 1338 info->memory = isend[3]; 1339 info->mallocs = isend[4]; 1340 } else if (flag == MAT_GLOBAL_MAX) { 1341 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1342 info->nz_used = irecv[0]; 1343 info->nz_allocated = irecv[1]; 1344 info->nz_unneeded = irecv[2]; 1345 info->memory = irecv[3]; 1346 info->mallocs = irecv[4]; 1347 } else if (flag == MAT_GLOBAL_SUM) { 1348 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1349 info->nz_used = irecv[0]; 1350 info->nz_allocated = irecv[1]; 1351 info->nz_unneeded = irecv[2]; 1352 info->memory = irecv[3]; 1353 info->mallocs = irecv[4]; 1354 } 1355 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1356 info->fill_ratio_needed = 0; 1357 info->factor_mallocs = 0; 1358 info->rows_global = (double)matin->rmap->N; 1359 info->columns_global = (double)matin->cmap->N; 1360 info->rows_local = (double)matin->rmap->n; 1361 info->columns_local = (double)matin->cmap->N; 1362 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatSetOption_MPIAIJ" 1368 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1369 { 1370 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 switch (op) { 1375 case MAT_NEW_NONZERO_LOCATIONS: 1376 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1377 case MAT_KEEP_ZEROED_ROWS: 1378 case MAT_NEW_NONZERO_LOCATION_ERR: 1379 case MAT_USE_INODES: 1380 case MAT_IGNORE_ZERO_ENTRIES: 1381 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1382 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1383 break; 1384 case MAT_ROW_ORIENTED: 1385 a->roworiented = flg; 1386 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1387 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1388 break; 1389 case MAT_NEW_DIAGONALS: 1390 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1391 break; 1392 case MAT_IGNORE_OFF_PROC_ENTRIES: 1393 a->donotstash = PETSC_TRUE; 1394 break; 1395 case MAT_SYMMETRIC: 1396 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1397 break; 1398 case MAT_STRUCTURALLY_SYMMETRIC: 1399 case MAT_HERMITIAN: 1400 case MAT_SYMMETRY_ETERNAL: 1401 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1402 break; 1403 default: 1404 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1405 } 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatGetRow_MPIAIJ" 1411 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1412 { 1413 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1414 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1415 PetscErrorCode ierr; 1416 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1417 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1418 PetscInt *cmap,*idx_p; 1419 1420 PetscFunctionBegin; 1421 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1422 mat->getrowactive = PETSC_TRUE; 1423 1424 if (!mat->rowvalues && (idx || v)) { 1425 /* 1426 allocate enough space to hold information from the longest row. 1427 */ 1428 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1429 PetscInt max = 1,tmp; 1430 for (i=0; i<matin->rmap->n; i++) { 1431 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1432 if (max < tmp) { max = tmp; } 1433 } 1434 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1435 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1436 } 1437 1438 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1439 lrow = row - rstart; 1440 1441 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1442 if (!v) {pvA = 0; pvB = 0;} 1443 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1444 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1445 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1446 nztot = nzA + nzB; 1447 1448 cmap = mat->garray; 1449 if (v || idx) { 1450 if (nztot) { 1451 /* Sort by increasing column numbers, assuming A and B already sorted */ 1452 PetscInt imark = -1; 1453 if (v) { 1454 *v = v_p = mat->rowvalues; 1455 for (i=0; i<nzB; i++) { 1456 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1457 else break; 1458 } 1459 imark = i; 1460 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1461 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1462 } 1463 if (idx) { 1464 *idx = idx_p = mat->rowindices; 1465 if (imark > -1) { 1466 for (i=0; i<imark; i++) { 1467 idx_p[i] = cmap[cworkB[i]]; 1468 } 1469 } else { 1470 for (i=0; i<nzB; i++) { 1471 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1472 else break; 1473 } 1474 imark = i; 1475 } 1476 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1477 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1478 } 1479 } else { 1480 if (idx) *idx = 0; 1481 if (v) *v = 0; 1482 } 1483 } 1484 *nz = nztot; 1485 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1486 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1487 PetscFunctionReturn(0); 1488 } 1489 1490 #undef __FUNCT__ 1491 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1492 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1493 { 1494 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1495 1496 PetscFunctionBegin; 1497 if (!aij->getrowactive) { 1498 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1499 } 1500 aij->getrowactive = PETSC_FALSE; 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatNorm_MPIAIJ" 1506 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1507 { 1508 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1509 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1510 PetscErrorCode ierr; 1511 PetscInt i,j,cstart = mat->cmap->rstart; 1512 PetscReal sum = 0.0; 1513 MatScalar *v; 1514 1515 PetscFunctionBegin; 1516 if (aij->size == 1) { 1517 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1518 } else { 1519 if (type == NORM_FROBENIUS) { 1520 v = amat->a; 1521 for (i=0; i<amat->nz; i++) { 1522 #if defined(PETSC_USE_COMPLEX) 1523 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1524 #else 1525 sum += (*v)*(*v); v++; 1526 #endif 1527 } 1528 v = bmat->a; 1529 for (i=0; i<bmat->nz; i++) { 1530 #if defined(PETSC_USE_COMPLEX) 1531 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1532 #else 1533 sum += (*v)*(*v); v++; 1534 #endif 1535 } 1536 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1537 *norm = sqrt(*norm); 1538 } else if (type == NORM_1) { /* max column norm */ 1539 PetscReal *tmp,*tmp2; 1540 PetscInt *jj,*garray = aij->garray; 1541 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1542 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1543 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1544 *norm = 0.0; 1545 v = amat->a; jj = amat->j; 1546 for (j=0; j<amat->nz; j++) { 1547 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1548 } 1549 v = bmat->a; jj = bmat->j; 1550 for (j=0; j<bmat->nz; j++) { 1551 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1552 } 1553 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1554 for (j=0; j<mat->cmap->N; j++) { 1555 if (tmp2[j] > *norm) *norm = tmp2[j]; 1556 } 1557 ierr = PetscFree(tmp);CHKERRQ(ierr); 1558 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1559 } else if (type == NORM_INFINITY) { /* max row norm */ 1560 PetscReal ntemp = 0.0; 1561 for (j=0; j<aij->A->rmap->n; j++) { 1562 v = amat->a + amat->i[j]; 1563 sum = 0.0; 1564 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1565 sum += PetscAbsScalar(*v); v++; 1566 } 1567 v = bmat->a + bmat->i[j]; 1568 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1569 sum += PetscAbsScalar(*v); v++; 1570 } 1571 if (sum > ntemp) ntemp = sum; 1572 } 1573 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1574 } else { 1575 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1576 } 1577 } 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatTranspose_MPIAIJ" 1583 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1584 { 1585 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1586 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1587 PetscErrorCode ierr; 1588 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1589 PetscInt cstart=A->cmap->rstart,ncol; 1590 Mat B; 1591 MatScalar *array; 1592 1593 PetscFunctionBegin; 1594 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1595 1596 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1597 ai = Aloc->i; aj = Aloc->j; 1598 bi = Bloc->i; bj = Bloc->j; 1599 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1600 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1601 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1602 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1603 for (i=0; i<ai[ma]; i++){ 1604 d_nnz[aj[i]] ++; 1605 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1606 } 1607 1608 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1609 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1610 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1611 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1612 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1613 } else { 1614 B = *matout; 1615 } 1616 1617 /* copy over the A part */ 1618 array = Aloc->a; 1619 row = A->rmap->rstart; 1620 for (i=0; i<ma; i++) { 1621 ncol = ai[i+1]-ai[i]; 1622 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1623 row++; array += ncol; aj += ncol; 1624 } 1625 aj = Aloc->j; 1626 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1627 1628 /* copy over the B part */ 1629 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1630 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1631 array = Bloc->a; 1632 row = A->rmap->rstart; 1633 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1634 cols_tmp = cols; 1635 for (i=0; i<mb; i++) { 1636 ncol = bi[i+1]-bi[i]; 1637 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1638 row++; array += ncol; cols_tmp += ncol; 1639 } 1640 ierr = PetscFree(cols);CHKERRQ(ierr); 1641 1642 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1644 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1645 *matout = B; 1646 } else { 1647 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1648 } 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1654 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1655 { 1656 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1657 Mat a = aij->A,b = aij->B; 1658 PetscErrorCode ierr; 1659 PetscInt s1,s2,s3; 1660 1661 PetscFunctionBegin; 1662 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1663 if (rr) { 1664 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1665 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1666 /* Overlap communication with computation. */ 1667 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1668 } 1669 if (ll) { 1670 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1671 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1672 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1673 } 1674 /* scale the diagonal block */ 1675 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1676 1677 if (rr) { 1678 /* Do a scatter end and then right scale the off-diagonal block */ 1679 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1680 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1681 } 1682 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1688 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1689 { 1690 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1691 PetscErrorCode ierr; 1692 1693 PetscFunctionBegin; 1694 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1695 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1696 PetscFunctionReturn(0); 1697 } 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1700 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1701 { 1702 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1703 PetscErrorCode ierr; 1704 1705 PetscFunctionBegin; 1706 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1707 PetscFunctionReturn(0); 1708 } 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatEqual_MPIAIJ" 1712 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1713 { 1714 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1715 Mat a,b,c,d; 1716 PetscTruth flg; 1717 PetscErrorCode ierr; 1718 1719 PetscFunctionBegin; 1720 a = matA->A; b = matA->B; 1721 c = matB->A; d = matB->B; 1722 1723 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1724 if (flg) { 1725 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1726 } 1727 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatCopy_MPIAIJ" 1733 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1734 { 1735 PetscErrorCode ierr; 1736 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1737 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1738 1739 PetscFunctionBegin; 1740 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1741 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1742 /* because of the column compression in the off-processor part of the matrix a->B, 1743 the number of columns in a->B and b->B may be different, hence we cannot call 1744 the MatCopy() directly on the two parts. If need be, we can provide a more 1745 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1746 then copying the submatrices */ 1747 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1748 } else { 1749 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1750 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1751 } 1752 PetscFunctionReturn(0); 1753 } 1754 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1757 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1758 { 1759 PetscErrorCode ierr; 1760 1761 PetscFunctionBegin; 1762 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1763 PetscFunctionReturn(0); 1764 } 1765 1766 #include "petscblaslapack.h" 1767 #undef __FUNCT__ 1768 #define __FUNCT__ "MatAXPY_MPIAIJ" 1769 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1770 { 1771 PetscErrorCode ierr; 1772 PetscInt i; 1773 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1774 PetscBLASInt bnz,one=1; 1775 Mat_SeqAIJ *x,*y; 1776 1777 PetscFunctionBegin; 1778 if (str == SAME_NONZERO_PATTERN) { 1779 PetscScalar alpha = a; 1780 x = (Mat_SeqAIJ *)xx->A->data; 1781 y = (Mat_SeqAIJ *)yy->A->data; 1782 bnz = PetscBLASIntCast(x->nz); 1783 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1784 x = (Mat_SeqAIJ *)xx->B->data; 1785 y = (Mat_SeqAIJ *)yy->B->data; 1786 bnz = PetscBLASIntCast(x->nz); 1787 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1788 } else if (str == SUBSET_NONZERO_PATTERN) { 1789 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1790 1791 x = (Mat_SeqAIJ *)xx->B->data; 1792 y = (Mat_SeqAIJ *)yy->B->data; 1793 if (y->xtoy && y->XtoY != xx->B) { 1794 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1795 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1796 } 1797 if (!y->xtoy) { /* get xtoy */ 1798 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1799 y->XtoY = xx->B; 1800 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1801 } 1802 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1803 } else { 1804 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1805 } 1806 PetscFunctionReturn(0); 1807 } 1808 1809 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatConjugate_MPIAIJ" 1813 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1814 { 1815 #if defined(PETSC_USE_COMPLEX) 1816 PetscErrorCode ierr; 1817 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1818 1819 PetscFunctionBegin; 1820 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1821 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1822 #else 1823 PetscFunctionBegin; 1824 #endif 1825 PetscFunctionReturn(0); 1826 } 1827 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "MatRealPart_MPIAIJ" 1830 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1831 { 1832 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1833 PetscErrorCode ierr; 1834 1835 PetscFunctionBegin; 1836 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1837 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1838 PetscFunctionReturn(0); 1839 } 1840 1841 #undef __FUNCT__ 1842 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1843 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1844 { 1845 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1846 PetscErrorCode ierr; 1847 1848 PetscFunctionBegin; 1849 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1850 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1851 PetscFunctionReturn(0); 1852 } 1853 1854 #ifdef PETSC_HAVE_PBGL 1855 1856 #include <boost/parallel/mpi/bsp_process_group.hpp> 1857 #include <boost/graph/distributed/ilu_default_graph.hpp> 1858 #include <boost/graph/distributed/ilu_0_block.hpp> 1859 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1860 #include <boost/graph/distributed/petsc/interface.hpp> 1861 #include <boost/multi_array.hpp> 1862 #include <boost/parallel/distributed_property_map->hpp> 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1866 /* 1867 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1868 */ 1869 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1870 { 1871 namespace petsc = boost::distributed::petsc; 1872 1873 namespace graph_dist = boost::graph::distributed; 1874 using boost::graph::distributed::ilu_default::process_group_type; 1875 using boost::graph::ilu_permuted; 1876 1877 PetscTruth row_identity, col_identity; 1878 PetscContainer c; 1879 PetscInt m, n, M, N; 1880 PetscErrorCode ierr; 1881 1882 PetscFunctionBegin; 1883 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1884 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1885 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1886 if (!row_identity || !col_identity) { 1887 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1888 } 1889 1890 process_group_type pg; 1891 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1892 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1893 lgraph_type& level_graph = *lgraph_p; 1894 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1895 1896 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1897 ilu_permuted(level_graph); 1898 1899 /* put together the new matrix */ 1900 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1901 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1902 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1903 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1904 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1905 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1906 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1907 1908 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1909 ierr = PetscContainerSetPointer(c, lgraph_p); 1910 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1916 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1917 { 1918 PetscFunctionBegin; 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatSolve_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1928 { 1929 namespace graph_dist = boost::graph::distributed; 1930 1931 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1932 lgraph_type* lgraph_p; 1933 PetscContainer c; 1934 PetscErrorCode ierr; 1935 1936 PetscFunctionBegin; 1937 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1938 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1939 ierr = VecCopy(b, x); CHKERRQ(ierr); 1940 1941 PetscScalar* array_x; 1942 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1943 PetscInt sx; 1944 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1945 1946 PetscScalar* array_b; 1947 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1948 PetscInt sb; 1949 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1950 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1955 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1956 ref_x(array_x, boost::extents[num_vertices(graph)]); 1957 1958 typedef boost::iterator_property_map<array_ref_type::iterator, 1959 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1960 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1961 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1962 1963 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1964 1965 PetscFunctionReturn(0); 1966 } 1967 #endif 1968 1969 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1970 PetscInt nzlocal,nsends,nrecvs; 1971 PetscMPIInt *send_rank; 1972 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1973 PetscScalar *sbuf_a,**rbuf_a; 1974 PetscErrorCode (*MatDestroy)(Mat); 1975 } Mat_Redundant; 1976 1977 #undef __FUNCT__ 1978 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1979 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1980 { 1981 PetscErrorCode ierr; 1982 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1983 PetscInt i; 1984 1985 PetscFunctionBegin; 1986 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1987 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1988 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1989 for (i=0; i<redund->nrecvs; i++){ 1990 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1991 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1992 } 1993 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1994 ierr = PetscFree(redund);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatDestroy_MatRedundant" 2000 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2001 { 2002 PetscErrorCode ierr; 2003 PetscContainer container; 2004 Mat_Redundant *redund=PETSC_NULL; 2005 2006 PetscFunctionBegin; 2007 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2008 if (container) { 2009 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2010 } else { 2011 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2012 } 2013 A->ops->destroy = redund->MatDestroy; 2014 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2015 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2016 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2022 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2023 { 2024 PetscMPIInt rank,size; 2025 MPI_Comm comm=((PetscObject)mat)->comm; 2026 PetscErrorCode ierr; 2027 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2028 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2029 PetscInt *rowrange=mat->rmap->range; 2030 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2031 Mat A=aij->A,B=aij->B,C=*matredundant; 2032 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2033 PetscScalar *sbuf_a; 2034 PetscInt nzlocal=a->nz+b->nz; 2035 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2036 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2037 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2038 MatScalar *aworkA,*aworkB; 2039 PetscScalar *vals; 2040 PetscMPIInt tag1,tag2,tag3,imdex; 2041 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2042 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2043 MPI_Status recv_status,*send_status; 2044 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2045 PetscInt **rbuf_j=PETSC_NULL; 2046 PetscScalar **rbuf_a=PETSC_NULL; 2047 Mat_Redundant *redund=PETSC_NULL; 2048 PetscContainer container; 2049 2050 PetscFunctionBegin; 2051 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2052 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2053 2054 if (reuse == MAT_REUSE_MATRIX) { 2055 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2056 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2057 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2058 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2059 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2060 if (container) { 2061 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2062 } else { 2063 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2064 } 2065 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2066 2067 nsends = redund->nsends; 2068 nrecvs = redund->nrecvs; 2069 send_rank = redund->send_rank; recv_rank = send_rank + size; 2070 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2071 sbuf_j = redund->sbuf_j; 2072 sbuf_a = redund->sbuf_a; 2073 rbuf_j = redund->rbuf_j; 2074 rbuf_a = redund->rbuf_a; 2075 } 2076 2077 if (reuse == MAT_INITIAL_MATRIX){ 2078 PetscMPIInt subrank,subsize; 2079 PetscInt nleftover,np_subcomm; 2080 /* get the destination processors' id send_rank, nsends and nrecvs */ 2081 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2082 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2083 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2084 recv_rank = send_rank + size; 2085 np_subcomm = size/nsubcomm; 2086 nleftover = size - nsubcomm*np_subcomm; 2087 nsends = 0; nrecvs = 0; 2088 for (i=0; i<size; i++){ /* i=rank*/ 2089 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2090 send_rank[nsends] = i; nsends++; 2091 recv_rank[nrecvs++] = i; 2092 } 2093 } 2094 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2095 i = size-nleftover-1; 2096 j = 0; 2097 while (j < nsubcomm - nleftover){ 2098 send_rank[nsends++] = i; 2099 i--; j++; 2100 } 2101 } 2102 2103 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2104 for (i=0; i<nleftover; i++){ 2105 recv_rank[nrecvs++] = size-nleftover+i; 2106 } 2107 } 2108 2109 /* allocate sbuf_j, sbuf_a */ 2110 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2111 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2112 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2113 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2114 2115 /* copy mat's local entries into the buffers */ 2116 if (reuse == MAT_INITIAL_MATRIX){ 2117 rownz_max = 0; 2118 rptr = sbuf_j; 2119 cols = sbuf_j + rend-rstart + 1; 2120 vals = sbuf_a; 2121 rptr[0] = 0; 2122 for (i=0; i<rend-rstart; i++){ 2123 row = i + rstart; 2124 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2125 ncols = nzA + nzB; 2126 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2127 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2128 /* load the column indices for this row into cols */ 2129 lwrite = 0; 2130 for (l=0; l<nzB; l++) { 2131 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2132 vals[lwrite] = aworkB[l]; 2133 cols[lwrite++] = ctmp; 2134 } 2135 } 2136 for (l=0; l<nzA; l++){ 2137 vals[lwrite] = aworkA[l]; 2138 cols[lwrite++] = cstart + cworkA[l]; 2139 } 2140 for (l=0; l<nzB; l++) { 2141 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2142 vals[lwrite] = aworkB[l]; 2143 cols[lwrite++] = ctmp; 2144 } 2145 } 2146 vals += ncols; 2147 cols += ncols; 2148 rptr[i+1] = rptr[i] + ncols; 2149 if (rownz_max < ncols) rownz_max = ncols; 2150 } 2151 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2152 } else { /* only copy matrix values into sbuf_a */ 2153 rptr = sbuf_j; 2154 vals = sbuf_a; 2155 rptr[0] = 0; 2156 for (i=0; i<rend-rstart; i++){ 2157 row = i + rstart; 2158 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2159 ncols = nzA + nzB; 2160 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2161 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2162 lwrite = 0; 2163 for (l=0; l<nzB; l++) { 2164 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2165 } 2166 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2167 for (l=0; l<nzB; l++) { 2168 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2169 } 2170 vals += ncols; 2171 rptr[i+1] = rptr[i] + ncols; 2172 } 2173 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2174 2175 /* send nzlocal to others, and recv other's nzlocal */ 2176 /*--------------------------------------------------*/ 2177 if (reuse == MAT_INITIAL_MATRIX){ 2178 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2179 s_waits2 = s_waits3 + nsends; 2180 s_waits1 = s_waits2 + nsends; 2181 r_waits1 = s_waits1 + nsends; 2182 r_waits2 = r_waits1 + nrecvs; 2183 r_waits3 = r_waits2 + nrecvs; 2184 } else { 2185 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2186 r_waits3 = s_waits3 + nsends; 2187 } 2188 2189 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2190 if (reuse == MAT_INITIAL_MATRIX){ 2191 /* get new tags to keep the communication clean */ 2192 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2193 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2194 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2195 rbuf_nz = sbuf_nz + nsends; 2196 2197 /* post receives of other's nzlocal */ 2198 for (i=0; i<nrecvs; i++){ 2199 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2200 } 2201 /* send nzlocal to others */ 2202 for (i=0; i<nsends; i++){ 2203 sbuf_nz[i] = nzlocal; 2204 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2205 } 2206 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2207 count = nrecvs; 2208 while (count) { 2209 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2210 recv_rank[imdex] = recv_status.MPI_SOURCE; 2211 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2212 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2213 2214 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2215 rbuf_nz[imdex] += i + 2; 2216 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2217 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2218 count--; 2219 } 2220 /* wait on sends of nzlocal */ 2221 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2222 /* send mat->i,j to others, and recv from other's */ 2223 /*------------------------------------------------*/ 2224 for (i=0; i<nsends; i++){ 2225 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2226 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2227 } 2228 /* wait on receives of mat->i,j */ 2229 /*------------------------------*/ 2230 count = nrecvs; 2231 while (count) { 2232 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2233 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2234 count--; 2235 } 2236 /* wait on sends of mat->i,j */ 2237 /*---------------------------*/ 2238 if (nsends) { 2239 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2240 } 2241 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2242 2243 /* post receives, send and receive mat->a */ 2244 /*----------------------------------------*/ 2245 for (imdex=0; imdex<nrecvs; imdex++) { 2246 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2247 } 2248 for (i=0; i<nsends; i++){ 2249 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2250 } 2251 count = nrecvs; 2252 while (count) { 2253 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2254 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2255 count--; 2256 } 2257 if (nsends) { 2258 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2259 } 2260 2261 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2262 2263 /* create redundant matrix */ 2264 /*-------------------------*/ 2265 if (reuse == MAT_INITIAL_MATRIX){ 2266 /* compute rownz_max for preallocation */ 2267 for (imdex=0; imdex<nrecvs; imdex++){ 2268 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2269 rptr = rbuf_j[imdex]; 2270 for (i=0; i<j; i++){ 2271 ncols = rptr[i+1] - rptr[i]; 2272 if (rownz_max < ncols) rownz_max = ncols; 2273 } 2274 } 2275 2276 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2277 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2278 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2279 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2280 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2281 } else { 2282 C = *matredundant; 2283 } 2284 2285 /* insert local matrix entries */ 2286 rptr = sbuf_j; 2287 cols = sbuf_j + rend-rstart + 1; 2288 vals = sbuf_a; 2289 for (i=0; i<rend-rstart; i++){ 2290 row = i + rstart; 2291 ncols = rptr[i+1] - rptr[i]; 2292 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2293 vals += ncols; 2294 cols += ncols; 2295 } 2296 /* insert received matrix entries */ 2297 for (imdex=0; imdex<nrecvs; imdex++){ 2298 rstart = rowrange[recv_rank[imdex]]; 2299 rend = rowrange[recv_rank[imdex]+1]; 2300 rptr = rbuf_j[imdex]; 2301 cols = rbuf_j[imdex] + rend-rstart + 1; 2302 vals = rbuf_a[imdex]; 2303 for (i=0; i<rend-rstart; i++){ 2304 row = i + rstart; 2305 ncols = rptr[i+1] - rptr[i]; 2306 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2307 vals += ncols; 2308 cols += ncols; 2309 } 2310 } 2311 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2312 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2313 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2314 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2315 if (reuse == MAT_INITIAL_MATRIX){ 2316 PetscContainer container; 2317 *matredundant = C; 2318 /* create a supporting struct and attach it to C for reuse */ 2319 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2320 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2321 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2322 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2323 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2324 2325 redund->nzlocal = nzlocal; 2326 redund->nsends = nsends; 2327 redund->nrecvs = nrecvs; 2328 redund->send_rank = send_rank; 2329 redund->sbuf_nz = sbuf_nz; 2330 redund->sbuf_j = sbuf_j; 2331 redund->sbuf_a = sbuf_a; 2332 redund->rbuf_j = rbuf_j; 2333 redund->rbuf_a = rbuf_a; 2334 2335 redund->MatDestroy = C->ops->destroy; 2336 C->ops->destroy = MatDestroy_MatRedundant; 2337 } 2338 PetscFunctionReturn(0); 2339 } 2340 2341 #undef __FUNCT__ 2342 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2343 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2344 { 2345 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2346 PetscErrorCode ierr; 2347 PetscInt i,*idxb = 0; 2348 PetscScalar *va,*vb; 2349 Vec vtmp; 2350 2351 PetscFunctionBegin; 2352 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2353 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2354 if (idx) { 2355 for (i=0; i<A->cmap->n; i++) { 2356 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2357 } 2358 } 2359 2360 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2361 if (idx) { 2362 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2363 } 2364 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2365 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2366 2367 for (i=0; i<A->rmap->n; i++){ 2368 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2369 va[i] = vb[i]; 2370 if (idx) idx[i] = a->garray[idxb[i]]; 2371 } 2372 } 2373 2374 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2375 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2376 if (idxb) { 2377 ierr = PetscFree(idxb);CHKERRQ(ierr); 2378 } 2379 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2385 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 PetscInt i,*idxb = 0; 2390 PetscScalar *va,*vb; 2391 Vec vtmp; 2392 2393 PetscFunctionBegin; 2394 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2395 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2396 if (idx) { 2397 for (i=0; i<A->cmap->n; i++) { 2398 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2399 } 2400 } 2401 2402 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2403 if (idx) { 2404 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2405 } 2406 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2407 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2408 2409 for (i=0; i<A->rmap->n; i++){ 2410 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2411 va[i] = vb[i]; 2412 if (idx) idx[i] = a->garray[idxb[i]]; 2413 } 2414 } 2415 2416 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2417 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2418 if (idxb) { 2419 ierr = PetscFree(idxb);CHKERRQ(ierr); 2420 } 2421 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2427 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2428 { 2429 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2430 PetscInt n = A->rmap->n; 2431 PetscInt cstart = A->cmap->rstart; 2432 PetscInt *cmap = mat->garray; 2433 PetscInt *diagIdx, *offdiagIdx; 2434 Vec diagV, offdiagV; 2435 PetscScalar *a, *diagA, *offdiagA; 2436 PetscInt r; 2437 PetscErrorCode ierr; 2438 2439 PetscFunctionBegin; 2440 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2441 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2442 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2443 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2444 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2445 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2446 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2447 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2448 for(r = 0; r < n; ++r) { 2449 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2450 a[r] = diagA[r]; 2451 idx[r] = cstart + diagIdx[r]; 2452 } else { 2453 a[r] = offdiagA[r]; 2454 idx[r] = cmap[offdiagIdx[r]]; 2455 } 2456 } 2457 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2458 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2459 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2460 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2461 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2462 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 #undef __FUNCT__ 2467 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2468 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2469 { 2470 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2471 PetscInt n = A->rmap->n; 2472 PetscInt cstart = A->cmap->rstart; 2473 PetscInt *cmap = mat->garray; 2474 PetscInt *diagIdx, *offdiagIdx; 2475 Vec diagV, offdiagV; 2476 PetscScalar *a, *diagA, *offdiagA; 2477 PetscInt r; 2478 PetscErrorCode ierr; 2479 2480 PetscFunctionBegin; 2481 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2482 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2483 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2484 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2485 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2486 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2487 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2488 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2489 for(r = 0; r < n; ++r) { 2490 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2491 a[r] = diagA[r]; 2492 idx[r] = cstart + diagIdx[r]; 2493 } else { 2494 a[r] = offdiagA[r]; 2495 idx[r] = cmap[offdiagIdx[r]]; 2496 } 2497 } 2498 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2499 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2500 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2501 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2502 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2503 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2504 PetscFunctionReturn(0); 2505 } 2506 2507 #undef __FUNCT__ 2508 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2509 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2510 { 2511 PetscErrorCode ierr; 2512 2513 PetscFunctionBegin; 2514 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 /* -------------------------------------------------------------------*/ 2519 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2520 MatGetRow_MPIAIJ, 2521 MatRestoreRow_MPIAIJ, 2522 MatMult_MPIAIJ, 2523 /* 4*/ MatMultAdd_MPIAIJ, 2524 MatMultTranspose_MPIAIJ, 2525 MatMultTransposeAdd_MPIAIJ, 2526 #ifdef PETSC_HAVE_PBGL 2527 MatSolve_MPIAIJ, 2528 #else 2529 0, 2530 #endif 2531 0, 2532 0, 2533 /*10*/ 0, 2534 0, 2535 0, 2536 MatRelax_MPIAIJ, 2537 MatTranspose_MPIAIJ, 2538 /*15*/ MatGetInfo_MPIAIJ, 2539 MatEqual_MPIAIJ, 2540 MatGetDiagonal_MPIAIJ, 2541 MatDiagonalScale_MPIAIJ, 2542 MatNorm_MPIAIJ, 2543 /*20*/ MatAssemblyBegin_MPIAIJ, 2544 MatAssemblyEnd_MPIAIJ, 2545 0, 2546 MatSetOption_MPIAIJ, 2547 MatZeroEntries_MPIAIJ, 2548 /*25*/ MatZeroRows_MPIAIJ, 2549 0, 2550 #ifdef PETSC_HAVE_PBGL 2551 0, 2552 #else 2553 0, 2554 #endif 2555 0, 2556 0, 2557 /*30*/ MatSetUpPreallocation_MPIAIJ, 2558 #ifdef PETSC_HAVE_PBGL 2559 0, 2560 #else 2561 0, 2562 #endif 2563 0, 2564 0, 2565 0, 2566 /*35*/ MatDuplicate_MPIAIJ, 2567 0, 2568 0, 2569 0, 2570 0, 2571 /*40*/ MatAXPY_MPIAIJ, 2572 MatGetSubMatrices_MPIAIJ, 2573 MatIncreaseOverlap_MPIAIJ, 2574 MatGetValues_MPIAIJ, 2575 MatCopy_MPIAIJ, 2576 /*45*/ MatGetRowMax_MPIAIJ, 2577 MatScale_MPIAIJ, 2578 0, 2579 0, 2580 0, 2581 /*50*/ MatSetBlockSize_MPIAIJ, 2582 0, 2583 0, 2584 0, 2585 0, 2586 /*55*/ MatFDColoringCreate_MPIAIJ, 2587 0, 2588 MatSetUnfactored_MPIAIJ, 2589 MatPermute_MPIAIJ, 2590 0, 2591 /*60*/ MatGetSubMatrix_MPIAIJ, 2592 MatDestroy_MPIAIJ, 2593 MatView_MPIAIJ, 2594 0, 2595 0, 2596 /*65*/ 0, 2597 0, 2598 0, 2599 0, 2600 0, 2601 /*70*/ MatGetRowMaxAbs_MPIAIJ, 2602 MatGetRowMinAbs_MPIAIJ, 2603 0, 2604 MatSetColoring_MPIAIJ, 2605 #if defined(PETSC_HAVE_ADIC) 2606 MatSetValuesAdic_MPIAIJ, 2607 #else 2608 0, 2609 #endif 2610 MatSetValuesAdifor_MPIAIJ, 2611 /*75*/ 0, 2612 0, 2613 0, 2614 0, 2615 0, 2616 /*80*/ 0, 2617 0, 2618 0, 2619 /*84*/ MatLoad_MPIAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 0, 2625 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2626 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2627 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2628 MatPtAP_Basic, 2629 MatPtAPSymbolic_MPIAIJ, 2630 /*95*/ MatPtAPNumeric_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*100*/0, 2636 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2637 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2638 MatConjugate_MPIAIJ, 2639 0, 2640 /*105*/MatSetValuesRow_MPIAIJ, 2641 MatRealPart_MPIAIJ, 2642 MatImaginaryPart_MPIAIJ, 2643 0, 2644 0, 2645 /*110*/0, 2646 MatGetRedundantMatrix_MPIAIJ, 2647 MatGetRowMin_MPIAIJ, 2648 0, 2649 0, 2650 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2651 2652 /* ----------------------------------------------------------------------------------------*/ 2653 2654 EXTERN_C_BEGIN 2655 #undef __FUNCT__ 2656 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2657 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2658 { 2659 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2660 PetscErrorCode ierr; 2661 2662 PetscFunctionBegin; 2663 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2664 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2665 PetscFunctionReturn(0); 2666 } 2667 EXTERN_C_END 2668 2669 EXTERN_C_BEGIN 2670 #undef __FUNCT__ 2671 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2672 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2673 { 2674 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2675 PetscErrorCode ierr; 2676 2677 PetscFunctionBegin; 2678 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2679 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2680 PetscFunctionReturn(0); 2681 } 2682 EXTERN_C_END 2683 2684 #include "petscpc.h" 2685 EXTERN_C_BEGIN 2686 #undef __FUNCT__ 2687 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2688 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2689 { 2690 Mat_MPIAIJ *b; 2691 PetscErrorCode ierr; 2692 PetscInt i; 2693 2694 PetscFunctionBegin; 2695 B->preallocated = PETSC_TRUE; 2696 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2697 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2698 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2699 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2700 2701 B->rmap->bs = B->cmap->bs = 1; 2702 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2703 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2704 if (d_nnz) { 2705 for (i=0; i<B->rmap->n; i++) { 2706 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2707 } 2708 } 2709 if (o_nnz) { 2710 for (i=0; i<B->rmap->n; i++) { 2711 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2712 } 2713 } 2714 b = (Mat_MPIAIJ*)B->data; 2715 2716 /* Explicitly create 2 MATSEQAIJ matrices. */ 2717 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2718 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2719 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2720 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2721 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2722 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2723 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2724 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2725 2726 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2727 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2728 2729 PetscFunctionReturn(0); 2730 } 2731 EXTERN_C_END 2732 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2735 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2736 { 2737 Mat mat; 2738 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2739 PetscErrorCode ierr; 2740 2741 PetscFunctionBegin; 2742 *newmat = 0; 2743 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2744 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2745 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2746 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2747 a = (Mat_MPIAIJ*)mat->data; 2748 2749 mat->factor = matin->factor; 2750 mat->rmap->bs = matin->rmap->bs; 2751 mat->assembled = PETSC_TRUE; 2752 mat->insertmode = NOT_SET_VALUES; 2753 mat->preallocated = PETSC_TRUE; 2754 2755 a->size = oldmat->size; 2756 a->rank = oldmat->rank; 2757 a->donotstash = oldmat->donotstash; 2758 a->roworiented = oldmat->roworiented; 2759 a->rowindices = 0; 2760 a->rowvalues = 0; 2761 a->getrowactive = PETSC_FALSE; 2762 2763 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2764 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2765 2766 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2767 if (oldmat->colmap) { 2768 #if defined (PETSC_USE_CTABLE) 2769 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2770 #else 2771 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2772 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2773 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2774 #endif 2775 } else a->colmap = 0; 2776 if (oldmat->garray) { 2777 PetscInt len; 2778 len = oldmat->B->cmap->n; 2779 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2780 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2781 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2782 } else a->garray = 0; 2783 2784 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2785 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2786 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2787 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2788 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2789 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2790 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2792 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2793 *newmat = mat; 2794 PetscFunctionReturn(0); 2795 } 2796 2797 #include "petscsys.h" 2798 2799 #undef __FUNCT__ 2800 #define __FUNCT__ "MatLoad_MPIAIJ" 2801 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2802 { 2803 Mat A; 2804 PetscScalar *vals,*svals; 2805 MPI_Comm comm = ((PetscObject)viewer)->comm; 2806 MPI_Status status; 2807 PetscErrorCode ierr; 2808 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2809 PetscInt i,nz,j,rstart,rend,mmax; 2810 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2811 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2812 PetscInt cend,cstart,n,*rowners; 2813 int fd; 2814 2815 PetscFunctionBegin; 2816 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2817 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2818 if (!rank) { 2819 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2820 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2821 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2822 } 2823 2824 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2825 M = header[1]; N = header[2]; 2826 /* determine ownership of all rows */ 2827 m = M/size + ((M % size) > rank); 2828 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2829 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2830 2831 /* First process needs enough room for process with most rows */ 2832 if (!rank) { 2833 mmax = rowners[1]; 2834 for (i=2; i<size; i++) { 2835 mmax = PetscMax(mmax,rowners[i]); 2836 } 2837 } else mmax = m; 2838 2839 rowners[0] = 0; 2840 for (i=2; i<=size; i++) { 2841 rowners[i] += rowners[i-1]; 2842 } 2843 rstart = rowners[rank]; 2844 rend = rowners[rank+1]; 2845 2846 /* distribute row lengths to all processors */ 2847 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2848 if (!rank) { 2849 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2850 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2851 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2852 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2853 for (j=0; j<m; j++) { 2854 procsnz[0] += ourlens[j]; 2855 } 2856 for (i=1; i<size; i++) { 2857 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2858 /* calculate the number of nonzeros on each processor */ 2859 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2860 procsnz[i] += rowlengths[j]; 2861 } 2862 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2863 } 2864 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2865 } else { 2866 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2867 } 2868 2869 if (!rank) { 2870 /* determine max buffer needed and allocate it */ 2871 maxnz = 0; 2872 for (i=0; i<size; i++) { 2873 maxnz = PetscMax(maxnz,procsnz[i]); 2874 } 2875 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2876 2877 /* read in my part of the matrix column indices */ 2878 nz = procsnz[0]; 2879 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2880 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2881 2882 /* read in every one elses and ship off */ 2883 for (i=1; i<size; i++) { 2884 nz = procsnz[i]; 2885 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2886 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2887 } 2888 ierr = PetscFree(cols);CHKERRQ(ierr); 2889 } else { 2890 /* determine buffer space needed for message */ 2891 nz = 0; 2892 for (i=0; i<m; i++) { 2893 nz += ourlens[i]; 2894 } 2895 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2896 2897 /* receive message of column indices*/ 2898 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2899 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2900 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2901 } 2902 2903 /* determine column ownership if matrix is not square */ 2904 if (N != M) { 2905 n = N/size + ((N % size) > rank); 2906 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2907 cstart = cend - n; 2908 } else { 2909 cstart = rstart; 2910 cend = rend; 2911 n = cend - cstart; 2912 } 2913 2914 /* loop over local rows, determining number of off diagonal entries */ 2915 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2916 jj = 0; 2917 for (i=0; i<m; i++) { 2918 for (j=0; j<ourlens[i]; j++) { 2919 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2920 jj++; 2921 } 2922 } 2923 2924 /* create our matrix */ 2925 for (i=0; i<m; i++) { 2926 ourlens[i] -= offlens[i]; 2927 } 2928 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2929 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2930 ierr = MatSetType(A,type);CHKERRQ(ierr); 2931 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2932 2933 for (i=0; i<m; i++) { 2934 ourlens[i] += offlens[i]; 2935 } 2936 2937 if (!rank) { 2938 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2939 2940 /* read in my part of the matrix numerical values */ 2941 nz = procsnz[0]; 2942 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2943 2944 /* insert into matrix */ 2945 jj = rstart; 2946 smycols = mycols; 2947 svals = vals; 2948 for (i=0; i<m; i++) { 2949 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2950 smycols += ourlens[i]; 2951 svals += ourlens[i]; 2952 jj++; 2953 } 2954 2955 /* read in other processors and ship out */ 2956 for (i=1; i<size; i++) { 2957 nz = procsnz[i]; 2958 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2959 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2960 } 2961 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2962 } else { 2963 /* receive numeric values */ 2964 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2965 2966 /* receive message of values*/ 2967 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2968 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2969 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2970 2971 /* insert into matrix */ 2972 jj = rstart; 2973 smycols = mycols; 2974 svals = vals; 2975 for (i=0; i<m; i++) { 2976 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2977 smycols += ourlens[i]; 2978 svals += ourlens[i]; 2979 jj++; 2980 } 2981 } 2982 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2983 ierr = PetscFree(vals);CHKERRQ(ierr); 2984 ierr = PetscFree(mycols);CHKERRQ(ierr); 2985 ierr = PetscFree(rowners);CHKERRQ(ierr); 2986 2987 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2988 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2989 *newmat = A; 2990 PetscFunctionReturn(0); 2991 } 2992 2993 #undef __FUNCT__ 2994 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2995 /* 2996 Not great since it makes two copies of the submatrix, first an SeqAIJ 2997 in local and then by concatenating the local matrices the end result. 2998 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2999 */ 3000 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3001 { 3002 PetscErrorCode ierr; 3003 PetscMPIInt rank,size; 3004 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3005 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3006 Mat *local,M,Mreuse; 3007 MatScalar *vwork,*aa; 3008 MPI_Comm comm = ((PetscObject)mat)->comm; 3009 Mat_SeqAIJ *aij; 3010 3011 3012 PetscFunctionBegin; 3013 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3014 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3015 3016 if (call == MAT_REUSE_MATRIX) { 3017 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3018 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3019 local = &Mreuse; 3020 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3021 } else { 3022 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3023 Mreuse = *local; 3024 ierr = PetscFree(local);CHKERRQ(ierr); 3025 } 3026 3027 /* 3028 m - number of local rows 3029 n - number of columns (same on all processors) 3030 rstart - first row in new global matrix generated 3031 */ 3032 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3033 if (call == MAT_INITIAL_MATRIX) { 3034 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3035 ii = aij->i; 3036 jj = aij->j; 3037 3038 /* 3039 Determine the number of non-zeros in the diagonal and off-diagonal 3040 portions of the matrix in order to do correct preallocation 3041 */ 3042 3043 /* first get start and end of "diagonal" columns */ 3044 if (csize == PETSC_DECIDE) { 3045 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3046 if (mglobal == n) { /* square matrix */ 3047 nlocal = m; 3048 } else { 3049 nlocal = n/size + ((n % size) > rank); 3050 } 3051 } else { 3052 nlocal = csize; 3053 } 3054 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3055 rstart = rend - nlocal; 3056 if (rank == size - 1 && rend != n) { 3057 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3058 } 3059 3060 /* next, compute all the lengths */ 3061 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3062 olens = dlens + m; 3063 for (i=0; i<m; i++) { 3064 jend = ii[i+1] - ii[i]; 3065 olen = 0; 3066 dlen = 0; 3067 for (j=0; j<jend; j++) { 3068 if (*jj < rstart || *jj >= rend) olen++; 3069 else dlen++; 3070 jj++; 3071 } 3072 olens[i] = olen; 3073 dlens[i] = dlen; 3074 } 3075 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3076 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3077 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3078 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3079 ierr = PetscFree(dlens);CHKERRQ(ierr); 3080 } else { 3081 PetscInt ml,nl; 3082 3083 M = *newmat; 3084 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3085 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3086 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3087 /* 3088 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3089 rather than the slower MatSetValues(). 3090 */ 3091 M->was_assembled = PETSC_TRUE; 3092 M->assembled = PETSC_FALSE; 3093 } 3094 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3095 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3096 ii = aij->i; 3097 jj = aij->j; 3098 aa = aij->a; 3099 for (i=0; i<m; i++) { 3100 row = rstart + i; 3101 nz = ii[i+1] - ii[i]; 3102 cwork = jj; jj += nz; 3103 vwork = aa; aa += nz; 3104 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3105 } 3106 3107 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3108 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109 *newmat = M; 3110 3111 /* save submatrix used in processor for next request */ 3112 if (call == MAT_INITIAL_MATRIX) { 3113 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3114 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3115 } 3116 3117 PetscFunctionReturn(0); 3118 } 3119 3120 EXTERN_C_BEGIN 3121 #undef __FUNCT__ 3122 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3123 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3124 { 3125 PetscInt m,cstart, cend,j,nnz,i,d; 3126 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3127 const PetscInt *JJ; 3128 PetscScalar *values; 3129 PetscErrorCode ierr; 3130 3131 PetscFunctionBegin; 3132 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3133 3134 B->rmap->bs = B->cmap->bs = 1; 3135 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3136 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3137 m = B->rmap->n; 3138 cstart = B->cmap->rstart; 3139 cend = B->cmap->rend; 3140 rstart = B->rmap->rstart; 3141 3142 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3143 o_nnz = d_nnz + m; 3144 3145 #if defined(PETSC_USE_DEBUGGING) 3146 for (i=0; i<m; i++) { 3147 nnz = Ii[i+1]- Ii[i]; 3148 JJ = J + Ii[i]; 3149 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3150 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3151 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3152 for (j=1; j<nnz; j++) { 3153 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3154 } 3155 } 3156 #endif 3157 3158 for (i=0; i<m; i++) { 3159 nnz = Ii[i+1]- Ii[i]; 3160 JJ = J + Ii[i]; 3161 nnz_max = PetscMax(nnz_max,nnz); 3162 for (j=0; j<nnz; j++) { 3163 if (*JJ >= cstart) break; 3164 JJ++; 3165 } 3166 d = 0; 3167 for (; j<nnz; j++) { 3168 if (*JJ++ >= cend) break; 3169 d++; 3170 } 3171 d_nnz[i] = d; 3172 o_nnz[i] = nnz - d; 3173 } 3174 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3175 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3176 3177 if (v) values = (PetscScalar*)v; 3178 else { 3179 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3180 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3181 } 3182 3183 for (i=0; i<m; i++) { 3184 ii = i + rstart; 3185 nnz = Ii[i+1]- Ii[i]; 3186 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3187 } 3188 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3189 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3190 3191 if (!v) { 3192 ierr = PetscFree(values);CHKERRQ(ierr); 3193 } 3194 PetscFunctionReturn(0); 3195 } 3196 EXTERN_C_END 3197 3198 #undef __FUNCT__ 3199 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3200 /*@ 3201 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3202 (the default parallel PETSc format). 3203 3204 Collective on MPI_Comm 3205 3206 Input Parameters: 3207 + B - the matrix 3208 . i - the indices into j for the start of each local row (starts with zero) 3209 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3210 - v - optional values in the matrix 3211 3212 Level: developer 3213 3214 Notes: 3215 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3216 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3217 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3218 3219 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3220 3221 The format which is used for the sparse matrix input, is equivalent to a 3222 row-major ordering.. i.e for the following matrix, the input data expected is 3223 as shown: 3224 3225 1 0 0 3226 2 0 3 P0 3227 ------- 3228 4 5 6 P1 3229 3230 Process0 [P0]: rows_owned=[0,1] 3231 i = {0,1,3} [size = nrow+1 = 2+1] 3232 j = {0,0,2} [size = nz = 6] 3233 v = {1,2,3} [size = nz = 6] 3234 3235 Process1 [P1]: rows_owned=[2] 3236 i = {0,3} [size = nrow+1 = 1+1] 3237 j = {0,1,2} [size = nz = 6] 3238 v = {4,5,6} [size = nz = 6] 3239 3240 The column indices for each row MUST be sorted. 3241 3242 .keywords: matrix, aij, compressed row, sparse, parallel 3243 3244 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3245 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3246 @*/ 3247 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3248 { 3249 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3250 3251 PetscFunctionBegin; 3252 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3253 if (f) { 3254 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3255 } 3256 PetscFunctionReturn(0); 3257 } 3258 3259 #undef __FUNCT__ 3260 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3261 /*@C 3262 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3263 (the default parallel PETSc format). For good matrix assembly performance 3264 the user should preallocate the matrix storage by setting the parameters 3265 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3266 performance can be increased by more than a factor of 50. 3267 3268 Collective on MPI_Comm 3269 3270 Input Parameters: 3271 + A - the matrix 3272 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3273 (same value is used for all local rows) 3274 . d_nnz - array containing the number of nonzeros in the various rows of the 3275 DIAGONAL portion of the local submatrix (possibly different for each row) 3276 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3277 The size of this array is equal to the number of local rows, i.e 'm'. 3278 You must leave room for the diagonal entry even if it is zero. 3279 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3280 submatrix (same value is used for all local rows). 3281 - o_nnz - array containing the number of nonzeros in the various rows of the 3282 OFF-DIAGONAL portion of the local submatrix (possibly different for 3283 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3284 structure. The size of this array is equal to the number 3285 of local rows, i.e 'm'. 3286 3287 If the *_nnz parameter is given then the *_nz parameter is ignored 3288 3289 The AIJ format (also called the Yale sparse matrix format or 3290 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3291 storage. The stored row and column indices begin with zero. See the users manual for details. 3292 3293 The parallel matrix is partitioned such that the first m0 rows belong to 3294 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3295 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3296 3297 The DIAGONAL portion of the local submatrix of a processor can be defined 3298 as the submatrix which is obtained by extraction the part corresponding 3299 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3300 first row that belongs to the processor, and r2 is the last row belonging 3301 to the this processor. This is a square mxm matrix. The remaining portion 3302 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3303 3304 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3305 3306 You can call MatGetInfo() to get information on how effective the preallocation was; 3307 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3308 You can also run with the option -info and look for messages with the string 3309 malloc in them to see if additional memory allocation was needed. 3310 3311 Example usage: 3312 3313 Consider the following 8x8 matrix with 34 non-zero values, that is 3314 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3315 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3316 as follows: 3317 3318 .vb 3319 1 2 0 | 0 3 0 | 0 4 3320 Proc0 0 5 6 | 7 0 0 | 8 0 3321 9 0 10 | 11 0 0 | 12 0 3322 ------------------------------------- 3323 13 0 14 | 15 16 17 | 0 0 3324 Proc1 0 18 0 | 19 20 21 | 0 0 3325 0 0 0 | 22 23 0 | 24 0 3326 ------------------------------------- 3327 Proc2 25 26 27 | 0 0 28 | 29 0 3328 30 0 0 | 31 32 33 | 0 34 3329 .ve 3330 3331 This can be represented as a collection of submatrices as: 3332 3333 .vb 3334 A B C 3335 D E F 3336 G H I 3337 .ve 3338 3339 Where the submatrices A,B,C are owned by proc0, D,E,F are 3340 owned by proc1, G,H,I are owned by proc2. 3341 3342 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3343 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3344 The 'M','N' parameters are 8,8, and have the same values on all procs. 3345 3346 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3347 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3348 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3349 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3350 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3351 matrix, ans [DF] as another SeqAIJ matrix. 3352 3353 When d_nz, o_nz parameters are specified, d_nz storage elements are 3354 allocated for every row of the local diagonal submatrix, and o_nz 3355 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3356 One way to choose d_nz and o_nz is to use the max nonzerors per local 3357 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3358 In this case, the values of d_nz,o_nz are: 3359 .vb 3360 proc0 : dnz = 2, o_nz = 2 3361 proc1 : dnz = 3, o_nz = 2 3362 proc2 : dnz = 1, o_nz = 4 3363 .ve 3364 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3365 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3366 for proc3. i.e we are using 12+15+10=37 storage locations to store 3367 34 values. 3368 3369 When d_nnz, o_nnz parameters are specified, the storage is specified 3370 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3371 In the above case the values for d_nnz,o_nnz are: 3372 .vb 3373 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3374 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3375 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3376 .ve 3377 Here the space allocated is sum of all the above values i.e 34, and 3378 hence pre-allocation is perfect. 3379 3380 Level: intermediate 3381 3382 .keywords: matrix, aij, compressed row, sparse, parallel 3383 3384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3385 MPIAIJ, MatGetInfo() 3386 @*/ 3387 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3388 { 3389 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3390 3391 PetscFunctionBegin; 3392 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3393 if (f) { 3394 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3395 } 3396 PetscFunctionReturn(0); 3397 } 3398 3399 #undef __FUNCT__ 3400 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3401 /*@ 3402 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3403 CSR format the local rows. 3404 3405 Collective on MPI_Comm 3406 3407 Input Parameters: 3408 + comm - MPI communicator 3409 . m - number of local rows (Cannot be PETSC_DECIDE) 3410 . n - This value should be the same as the local size used in creating the 3411 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3412 calculated if N is given) For square matrices n is almost always m. 3413 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3414 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3415 . i - row indices 3416 . j - column indices 3417 - a - matrix values 3418 3419 Output Parameter: 3420 . mat - the matrix 3421 3422 Level: intermediate 3423 3424 Notes: 3425 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3426 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3427 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3428 3429 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3430 3431 The format which is used for the sparse matrix input, is equivalent to a 3432 row-major ordering.. i.e for the following matrix, the input data expected is 3433 as shown: 3434 3435 1 0 0 3436 2 0 3 P0 3437 ------- 3438 4 5 6 P1 3439 3440 Process0 [P0]: rows_owned=[0,1] 3441 i = {0,1,3} [size = nrow+1 = 2+1] 3442 j = {0,0,2} [size = nz = 6] 3443 v = {1,2,3} [size = nz = 6] 3444 3445 Process1 [P1]: rows_owned=[2] 3446 i = {0,3} [size = nrow+1 = 1+1] 3447 j = {0,1,2} [size = nz = 6] 3448 v = {4,5,6} [size = nz = 6] 3449 3450 .keywords: matrix, aij, compressed row, sparse, parallel 3451 3452 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3453 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3454 @*/ 3455 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3456 { 3457 PetscErrorCode ierr; 3458 3459 PetscFunctionBegin; 3460 if (i[0]) { 3461 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3462 } 3463 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3464 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3465 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3466 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3467 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3468 PetscFunctionReturn(0); 3469 } 3470 3471 #undef __FUNCT__ 3472 #define __FUNCT__ "MatCreateMPIAIJ" 3473 /*@C 3474 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3475 (the default parallel PETSc format). For good matrix assembly performance 3476 the user should preallocate the matrix storage by setting the parameters 3477 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3478 performance can be increased by more than a factor of 50. 3479 3480 Collective on MPI_Comm 3481 3482 Input Parameters: 3483 + comm - MPI communicator 3484 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3485 This value should be the same as the local size used in creating the 3486 y vector for the matrix-vector product y = Ax. 3487 . n - This value should be the same as the local size used in creating the 3488 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3489 calculated if N is given) For square matrices n is almost always m. 3490 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3491 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3492 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3493 (same value is used for all local rows) 3494 . d_nnz - array containing the number of nonzeros in the various rows of the 3495 DIAGONAL portion of the local submatrix (possibly different for each row) 3496 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3497 The size of this array is equal to the number of local rows, i.e 'm'. 3498 You must leave room for the diagonal entry even if it is zero. 3499 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3500 submatrix (same value is used for all local rows). 3501 - o_nnz - array containing the number of nonzeros in the various rows of the 3502 OFF-DIAGONAL portion of the local submatrix (possibly different for 3503 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3504 structure. The size of this array is equal to the number 3505 of local rows, i.e 'm'. 3506 3507 Output Parameter: 3508 . A - the matrix 3509 3510 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3511 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3512 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3513 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3514 3515 Notes: 3516 If the *_nnz parameter is given then the *_nz parameter is ignored 3517 3518 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3519 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3520 storage requirements for this matrix. 3521 3522 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3523 processor than it must be used on all processors that share the object for 3524 that argument. 3525 3526 The user MUST specify either the local or global matrix dimensions 3527 (possibly both). 3528 3529 The parallel matrix is partitioned across processors such that the 3530 first m0 rows belong to process 0, the next m1 rows belong to 3531 process 1, the next m2 rows belong to process 2 etc.. where 3532 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3533 values corresponding to [m x N] submatrix. 3534 3535 The columns are logically partitioned with the n0 columns belonging 3536 to 0th partition, the next n1 columns belonging to the next 3537 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3538 3539 The DIAGONAL portion of the local submatrix on any given processor 3540 is the submatrix corresponding to the rows and columns m,n 3541 corresponding to the given processor. i.e diagonal matrix on 3542 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3543 etc. The remaining portion of the local submatrix [m x (N-n)] 3544 constitute the OFF-DIAGONAL portion. The example below better 3545 illustrates this concept. 3546 3547 For a square global matrix we define each processor's diagonal portion 3548 to be its local rows and the corresponding columns (a square submatrix); 3549 each processor's off-diagonal portion encompasses the remainder of the 3550 local matrix (a rectangular submatrix). 3551 3552 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3553 3554 When calling this routine with a single process communicator, a matrix of 3555 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3556 type of communicator, use the construction mechanism: 3557 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3558 3559 By default, this format uses inodes (identical nodes) when possible. 3560 We search for consecutive rows with the same nonzero structure, thereby 3561 reusing matrix information to achieve increased efficiency. 3562 3563 Options Database Keys: 3564 + -mat_no_inode - Do not use inodes 3565 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3566 - -mat_aij_oneindex - Internally use indexing starting at 1 3567 rather than 0. Note that when calling MatSetValues(), 3568 the user still MUST index entries starting at 0! 3569 3570 3571 Example usage: 3572 3573 Consider the following 8x8 matrix with 34 non-zero values, that is 3574 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3575 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3576 as follows: 3577 3578 .vb 3579 1 2 0 | 0 3 0 | 0 4 3580 Proc0 0 5 6 | 7 0 0 | 8 0 3581 9 0 10 | 11 0 0 | 12 0 3582 ------------------------------------- 3583 13 0 14 | 15 16 17 | 0 0 3584 Proc1 0 18 0 | 19 20 21 | 0 0 3585 0 0 0 | 22 23 0 | 24 0 3586 ------------------------------------- 3587 Proc2 25 26 27 | 0 0 28 | 29 0 3588 30 0 0 | 31 32 33 | 0 34 3589 .ve 3590 3591 This can be represented as a collection of submatrices as: 3592 3593 .vb 3594 A B C 3595 D E F 3596 G H I 3597 .ve 3598 3599 Where the submatrices A,B,C are owned by proc0, D,E,F are 3600 owned by proc1, G,H,I are owned by proc2. 3601 3602 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3603 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3604 The 'M','N' parameters are 8,8, and have the same values on all procs. 3605 3606 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3607 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3608 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3609 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3610 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3611 matrix, ans [DF] as another SeqAIJ matrix. 3612 3613 When d_nz, o_nz parameters are specified, d_nz storage elements are 3614 allocated for every row of the local diagonal submatrix, and o_nz 3615 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3616 One way to choose d_nz and o_nz is to use the max nonzerors per local 3617 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3618 In this case, the values of d_nz,o_nz are: 3619 .vb 3620 proc0 : dnz = 2, o_nz = 2 3621 proc1 : dnz = 3, o_nz = 2 3622 proc2 : dnz = 1, o_nz = 4 3623 .ve 3624 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3625 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3626 for proc3. i.e we are using 12+15+10=37 storage locations to store 3627 34 values. 3628 3629 When d_nnz, o_nnz parameters are specified, the storage is specified 3630 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3631 In the above case the values for d_nnz,o_nnz are: 3632 .vb 3633 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3634 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3635 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3636 .ve 3637 Here the space allocated is sum of all the above values i.e 34, and 3638 hence pre-allocation is perfect. 3639 3640 Level: intermediate 3641 3642 .keywords: matrix, aij, compressed row, sparse, parallel 3643 3644 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3645 MPIAIJ, MatCreateMPIAIJWithArrays() 3646 @*/ 3647 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3648 { 3649 PetscErrorCode ierr; 3650 PetscMPIInt size; 3651 3652 PetscFunctionBegin; 3653 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3654 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3655 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3656 if (size > 1) { 3657 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3658 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3659 } else { 3660 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3661 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3662 } 3663 PetscFunctionReturn(0); 3664 } 3665 3666 #undef __FUNCT__ 3667 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3668 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3669 { 3670 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3671 3672 PetscFunctionBegin; 3673 *Ad = a->A; 3674 *Ao = a->B; 3675 *colmap = a->garray; 3676 PetscFunctionReturn(0); 3677 } 3678 3679 #undef __FUNCT__ 3680 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3681 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3682 { 3683 PetscErrorCode ierr; 3684 PetscInt i; 3685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3686 3687 PetscFunctionBegin; 3688 if (coloring->ctype == IS_COLORING_GLOBAL) { 3689 ISColoringValue *allcolors,*colors; 3690 ISColoring ocoloring; 3691 3692 /* set coloring for diagonal portion */ 3693 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3694 3695 /* set coloring for off-diagonal portion */ 3696 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3697 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3698 for (i=0; i<a->B->cmap->n; i++) { 3699 colors[i] = allcolors[a->garray[i]]; 3700 } 3701 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3702 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3703 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3704 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3705 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3706 ISColoringValue *colors; 3707 PetscInt *larray; 3708 ISColoring ocoloring; 3709 3710 /* set coloring for diagonal portion */ 3711 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3712 for (i=0; i<a->A->cmap->n; i++) { 3713 larray[i] = i + A->cmap->rstart; 3714 } 3715 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3716 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3717 for (i=0; i<a->A->cmap->n; i++) { 3718 colors[i] = coloring->colors[larray[i]]; 3719 } 3720 ierr = PetscFree(larray);CHKERRQ(ierr); 3721 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3722 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3723 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3724 3725 /* set coloring for off-diagonal portion */ 3726 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3727 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3728 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3729 for (i=0; i<a->B->cmap->n; i++) { 3730 colors[i] = coloring->colors[larray[i]]; 3731 } 3732 ierr = PetscFree(larray);CHKERRQ(ierr); 3733 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3734 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3735 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3736 } else { 3737 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3738 } 3739 3740 PetscFunctionReturn(0); 3741 } 3742 3743 #if defined(PETSC_HAVE_ADIC) 3744 #undef __FUNCT__ 3745 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3746 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3747 { 3748 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3749 PetscErrorCode ierr; 3750 3751 PetscFunctionBegin; 3752 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3753 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3754 PetscFunctionReturn(0); 3755 } 3756 #endif 3757 3758 #undef __FUNCT__ 3759 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3760 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3761 { 3762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3763 PetscErrorCode ierr; 3764 3765 PetscFunctionBegin; 3766 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3767 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3768 PetscFunctionReturn(0); 3769 } 3770 3771 #undef __FUNCT__ 3772 #define __FUNCT__ "MatMerge" 3773 /*@ 3774 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3775 matrices from each processor 3776 3777 Collective on MPI_Comm 3778 3779 Input Parameters: 3780 + comm - the communicators the parallel matrix will live on 3781 . inmat - the input sequential matrices 3782 . n - number of local columns (or PETSC_DECIDE) 3783 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3784 3785 Output Parameter: 3786 . outmat - the parallel matrix generated 3787 3788 Level: advanced 3789 3790 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3791 3792 @*/ 3793 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3794 { 3795 PetscErrorCode ierr; 3796 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3797 PetscInt *indx; 3798 PetscScalar *values; 3799 3800 PetscFunctionBegin; 3801 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3802 if (scall == MAT_INITIAL_MATRIX){ 3803 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3804 if (n == PETSC_DECIDE){ 3805 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3806 } 3807 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3808 rstart -= m; 3809 3810 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3811 for (i=0;i<m;i++) { 3812 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3813 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3814 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3815 } 3816 /* This routine will ONLY return MPIAIJ type matrix */ 3817 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3818 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3819 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3820 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3821 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3822 3823 } else if (scall == MAT_REUSE_MATRIX){ 3824 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3825 } else { 3826 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3827 } 3828 3829 for (i=0;i<m;i++) { 3830 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3831 Ii = i + rstart; 3832 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3833 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3834 } 3835 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3836 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3837 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3838 3839 PetscFunctionReturn(0); 3840 } 3841 3842 #undef __FUNCT__ 3843 #define __FUNCT__ "MatFileSplit" 3844 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3845 { 3846 PetscErrorCode ierr; 3847 PetscMPIInt rank; 3848 PetscInt m,N,i,rstart,nnz; 3849 size_t len; 3850 const PetscInt *indx; 3851 PetscViewer out; 3852 char *name; 3853 Mat B; 3854 const PetscScalar *values; 3855 3856 PetscFunctionBegin; 3857 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3858 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3859 /* Should this be the type of the diagonal block of A? */ 3860 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3861 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3862 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3863 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3864 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3865 for (i=0;i<m;i++) { 3866 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3867 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3868 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3869 } 3870 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3871 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3872 3873 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3874 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3875 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3876 sprintf(name,"%s.%d",outfile,rank); 3877 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3878 ierr = PetscFree(name); 3879 ierr = MatView(B,out);CHKERRQ(ierr); 3880 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3881 ierr = MatDestroy(B);CHKERRQ(ierr); 3882 PetscFunctionReturn(0); 3883 } 3884 3885 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3886 #undef __FUNCT__ 3887 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3888 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3889 { 3890 PetscErrorCode ierr; 3891 Mat_Merge_SeqsToMPI *merge; 3892 PetscContainer container; 3893 3894 PetscFunctionBegin; 3895 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3896 if (container) { 3897 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3898 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3899 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3900 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3901 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3902 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3903 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3904 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3905 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3906 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3907 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3908 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3909 3910 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3911 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3912 } 3913 ierr = PetscFree(merge);CHKERRQ(ierr); 3914 3915 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3916 PetscFunctionReturn(0); 3917 } 3918 3919 #include "src/mat/utils/freespace.h" 3920 #include "petscbt.h" 3921 3922 #undef __FUNCT__ 3923 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3924 /*@C 3925 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3926 matrices from each processor 3927 3928 Collective on MPI_Comm 3929 3930 Input Parameters: 3931 + comm - the communicators the parallel matrix will live on 3932 . seqmat - the input sequential matrices 3933 . m - number of local rows (or PETSC_DECIDE) 3934 . n - number of local columns (or PETSC_DECIDE) 3935 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3936 3937 Output Parameter: 3938 . mpimat - the parallel matrix generated 3939 3940 Level: advanced 3941 3942 Notes: 3943 The dimensions of the sequential matrix in each processor MUST be the same. 3944 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3945 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3946 @*/ 3947 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3948 { 3949 PetscErrorCode ierr; 3950 MPI_Comm comm=((PetscObject)mpimat)->comm; 3951 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3952 PetscMPIInt size,rank,taga,*len_s; 3953 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 3954 PetscInt proc,m; 3955 PetscInt **buf_ri,**buf_rj; 3956 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3957 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3958 MPI_Request *s_waits,*r_waits; 3959 MPI_Status *status; 3960 MatScalar *aa=a->a; 3961 MatScalar **abuf_r,*ba_i; 3962 Mat_Merge_SeqsToMPI *merge; 3963 PetscContainer container; 3964 3965 PetscFunctionBegin; 3966 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3967 3968 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3969 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3970 3971 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3972 if (container) { 3973 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3974 } 3975 bi = merge->bi; 3976 bj = merge->bj; 3977 buf_ri = merge->buf_ri; 3978 buf_rj = merge->buf_rj; 3979 3980 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3981 owners = merge->rowmap.range; 3982 len_s = merge->len_s; 3983 3984 /* send and recv matrix values */ 3985 /*-----------------------------*/ 3986 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3987 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3988 3989 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3990 for (proc=0,k=0; proc<size; proc++){ 3991 if (!len_s[proc]) continue; 3992 i = owners[proc]; 3993 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3994 k++; 3995 } 3996 3997 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3998 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3999 ierr = PetscFree(status);CHKERRQ(ierr); 4000 4001 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4002 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4003 4004 /* insert mat values of mpimat */ 4005 /*----------------------------*/ 4006 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4007 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4008 nextrow = buf_ri_k + merge->nrecv; 4009 nextai = nextrow + merge->nrecv; 4010 4011 for (k=0; k<merge->nrecv; k++){ 4012 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4013 nrows = *(buf_ri_k[k]); 4014 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4015 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4016 } 4017 4018 /* set values of ba */ 4019 m = merge->rowmap.n; 4020 for (i=0; i<m; i++) { 4021 arow = owners[rank] + i; 4022 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4023 bnzi = bi[i+1] - bi[i]; 4024 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4025 4026 /* add local non-zero vals of this proc's seqmat into ba */ 4027 anzi = ai[arow+1] - ai[arow]; 4028 aj = a->j + ai[arow]; 4029 aa = a->a + ai[arow]; 4030 nextaj = 0; 4031 for (j=0; nextaj<anzi; j++){ 4032 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4033 ba_i[j] += aa[nextaj++]; 4034 } 4035 } 4036 4037 /* add received vals into ba */ 4038 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4039 /* i-th row */ 4040 if (i == *nextrow[k]) { 4041 anzi = *(nextai[k]+1) - *nextai[k]; 4042 aj = buf_rj[k] + *(nextai[k]); 4043 aa = abuf_r[k] + *(nextai[k]); 4044 nextaj = 0; 4045 for (j=0; nextaj<anzi; j++){ 4046 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4047 ba_i[j] += aa[nextaj++]; 4048 } 4049 } 4050 nextrow[k]++; nextai[k]++; 4051 } 4052 } 4053 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4054 } 4055 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4056 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4057 4058 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4059 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4060 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4061 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4062 PetscFunctionReturn(0); 4063 } 4064 4065 #undef __FUNCT__ 4066 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4067 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4068 { 4069 PetscErrorCode ierr; 4070 Mat B_mpi; 4071 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4072 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4073 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4074 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4075 PetscInt len,proc,*dnz,*onz; 4076 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4077 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4078 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4079 MPI_Status *status; 4080 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4081 PetscBT lnkbt; 4082 Mat_Merge_SeqsToMPI *merge; 4083 PetscContainer container; 4084 4085 PetscFunctionBegin; 4086 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4087 4088 /* make sure it is a PETSc comm */ 4089 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4090 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4091 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4092 4093 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4094 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4095 4096 /* determine row ownership */ 4097 /*---------------------------------------------------------*/ 4098 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4099 merge->rowmap.n = m; 4100 merge->rowmap.N = M; 4101 merge->rowmap.bs = 1; 4102 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4103 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4104 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4105 4106 m = merge->rowmap.n; 4107 M = merge->rowmap.N; 4108 owners = merge->rowmap.range; 4109 4110 /* determine the number of messages to send, their lengths */ 4111 /*---------------------------------------------------------*/ 4112 len_s = merge->len_s; 4113 4114 len = 0; /* length of buf_si[] */ 4115 merge->nsend = 0; 4116 for (proc=0; proc<size; proc++){ 4117 len_si[proc] = 0; 4118 if (proc == rank){ 4119 len_s[proc] = 0; 4120 } else { 4121 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4122 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4123 } 4124 if (len_s[proc]) { 4125 merge->nsend++; 4126 nrows = 0; 4127 for (i=owners[proc]; i<owners[proc+1]; i++){ 4128 if (ai[i+1] > ai[i]) nrows++; 4129 } 4130 len_si[proc] = 2*(nrows+1); 4131 len += len_si[proc]; 4132 } 4133 } 4134 4135 /* determine the number and length of messages to receive for ij-structure */ 4136 /*-------------------------------------------------------------------------*/ 4137 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4138 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4139 4140 /* post the Irecv of j-structure */ 4141 /*-------------------------------*/ 4142 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4143 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4144 4145 /* post the Isend of j-structure */ 4146 /*--------------------------------*/ 4147 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4148 sj_waits = si_waits + merge->nsend; 4149 4150 for (proc=0, k=0; proc<size; proc++){ 4151 if (!len_s[proc]) continue; 4152 i = owners[proc]; 4153 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4154 k++; 4155 } 4156 4157 /* receives and sends of j-structure are complete */ 4158 /*------------------------------------------------*/ 4159 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4160 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4161 4162 /* send and recv i-structure */ 4163 /*---------------------------*/ 4164 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4165 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4166 4167 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4168 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4169 for (proc=0,k=0; proc<size; proc++){ 4170 if (!len_s[proc]) continue; 4171 /* form outgoing message for i-structure: 4172 buf_si[0]: nrows to be sent 4173 [1:nrows]: row index (global) 4174 [nrows+1:2*nrows+1]: i-structure index 4175 */ 4176 /*-------------------------------------------*/ 4177 nrows = len_si[proc]/2 - 1; 4178 buf_si_i = buf_si + nrows+1; 4179 buf_si[0] = nrows; 4180 buf_si_i[0] = 0; 4181 nrows = 0; 4182 for (i=owners[proc]; i<owners[proc+1]; i++){ 4183 anzi = ai[i+1] - ai[i]; 4184 if (anzi) { 4185 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4186 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4187 nrows++; 4188 } 4189 } 4190 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4191 k++; 4192 buf_si += len_si[proc]; 4193 } 4194 4195 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4196 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4197 4198 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4199 for (i=0; i<merge->nrecv; i++){ 4200 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4201 } 4202 4203 ierr = PetscFree(len_si);CHKERRQ(ierr); 4204 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4205 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4206 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4207 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4208 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4209 ierr = PetscFree(status);CHKERRQ(ierr); 4210 4211 /* compute a local seq matrix in each processor */ 4212 /*----------------------------------------------*/ 4213 /* allocate bi array and free space for accumulating nonzero column info */ 4214 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4215 bi[0] = 0; 4216 4217 /* create and initialize a linked list */ 4218 nlnk = N+1; 4219 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4220 4221 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4222 len = 0; 4223 len = ai[owners[rank+1]] - ai[owners[rank]]; 4224 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4225 current_space = free_space; 4226 4227 /* determine symbolic info for each local row */ 4228 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4229 nextrow = buf_ri_k + merge->nrecv; 4230 nextai = nextrow + merge->nrecv; 4231 for (k=0; k<merge->nrecv; k++){ 4232 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4233 nrows = *buf_ri_k[k]; 4234 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4235 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4236 } 4237 4238 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4239 len = 0; 4240 for (i=0;i<m;i++) { 4241 bnzi = 0; 4242 /* add local non-zero cols of this proc's seqmat into lnk */ 4243 arow = owners[rank] + i; 4244 anzi = ai[arow+1] - ai[arow]; 4245 aj = a->j + ai[arow]; 4246 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4247 bnzi += nlnk; 4248 /* add received col data into lnk */ 4249 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4250 if (i == *nextrow[k]) { /* i-th row */ 4251 anzi = *(nextai[k]+1) - *nextai[k]; 4252 aj = buf_rj[k] + *nextai[k]; 4253 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4254 bnzi += nlnk; 4255 nextrow[k]++; nextai[k]++; 4256 } 4257 } 4258 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4259 4260 /* if free space is not available, make more free space */ 4261 if (current_space->local_remaining<bnzi) { 4262 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4263 nspacedouble++; 4264 } 4265 /* copy data into free space, then initialize lnk */ 4266 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4267 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4268 4269 current_space->array += bnzi; 4270 current_space->local_used += bnzi; 4271 current_space->local_remaining -= bnzi; 4272 4273 bi[i+1] = bi[i] + bnzi; 4274 } 4275 4276 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4277 4278 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4279 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4280 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4281 4282 /* create symbolic parallel matrix B_mpi */ 4283 /*---------------------------------------*/ 4284 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4285 if (n==PETSC_DECIDE) { 4286 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4287 } else { 4288 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4289 } 4290 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4291 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4292 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4293 4294 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4295 B_mpi->assembled = PETSC_FALSE; 4296 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4297 merge->bi = bi; 4298 merge->bj = bj; 4299 merge->buf_ri = buf_ri; 4300 merge->buf_rj = buf_rj; 4301 merge->coi = PETSC_NULL; 4302 merge->coj = PETSC_NULL; 4303 merge->owners_co = PETSC_NULL; 4304 4305 /* attach the supporting struct to B_mpi for reuse */ 4306 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4307 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4308 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4309 *mpimat = B_mpi; 4310 4311 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4312 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4313 PetscFunctionReturn(0); 4314 } 4315 4316 #undef __FUNCT__ 4317 #define __FUNCT__ "MatMerge_SeqsToMPI" 4318 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4319 { 4320 PetscErrorCode ierr; 4321 4322 PetscFunctionBegin; 4323 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4324 if (scall == MAT_INITIAL_MATRIX){ 4325 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4326 } 4327 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4328 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4329 PetscFunctionReturn(0); 4330 } 4331 4332 #undef __FUNCT__ 4333 #define __FUNCT__ "MatGetLocalMat" 4334 /*@ 4335 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4336 4337 Not Collective 4338 4339 Input Parameters: 4340 + A - the matrix 4341 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4342 4343 Output Parameter: 4344 . A_loc - the local sequential matrix generated 4345 4346 Level: developer 4347 4348 @*/ 4349 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4350 { 4351 PetscErrorCode ierr; 4352 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4353 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4354 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4355 MatScalar *aa=a->a,*ba=b->a,*cam; 4356 PetscScalar *ca; 4357 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4358 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4359 4360 PetscFunctionBegin; 4361 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4362 if (scall == MAT_INITIAL_MATRIX){ 4363 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4364 ci[0] = 0; 4365 for (i=0; i<am; i++){ 4366 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4367 } 4368 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4369 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4370 k = 0; 4371 for (i=0; i<am; i++) { 4372 ncols_o = bi[i+1] - bi[i]; 4373 ncols_d = ai[i+1] - ai[i]; 4374 /* off-diagonal portion of A */ 4375 for (jo=0; jo<ncols_o; jo++) { 4376 col = cmap[*bj]; 4377 if (col >= cstart) break; 4378 cj[k] = col; bj++; 4379 ca[k++] = *ba++; 4380 } 4381 /* diagonal portion of A */ 4382 for (j=0; j<ncols_d; j++) { 4383 cj[k] = cstart + *aj++; 4384 ca[k++] = *aa++; 4385 } 4386 /* off-diagonal portion of A */ 4387 for (j=jo; j<ncols_o; j++) { 4388 cj[k] = cmap[*bj++]; 4389 ca[k++] = *ba++; 4390 } 4391 } 4392 /* put together the new matrix */ 4393 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4394 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4395 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4396 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4397 mat->free_a = PETSC_TRUE; 4398 mat->free_ij = PETSC_TRUE; 4399 mat->nonew = 0; 4400 } else if (scall == MAT_REUSE_MATRIX){ 4401 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4402 ci = mat->i; cj = mat->j; cam = mat->a; 4403 for (i=0; i<am; i++) { 4404 /* off-diagonal portion of A */ 4405 ncols_o = bi[i+1] - bi[i]; 4406 for (jo=0; jo<ncols_o; jo++) { 4407 col = cmap[*bj]; 4408 if (col >= cstart) break; 4409 *cam++ = *ba++; bj++; 4410 } 4411 /* diagonal portion of A */ 4412 ncols_d = ai[i+1] - ai[i]; 4413 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4414 /* off-diagonal portion of A */ 4415 for (j=jo; j<ncols_o; j++) { 4416 *cam++ = *ba++; bj++; 4417 } 4418 } 4419 } else { 4420 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4421 } 4422 4423 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4424 PetscFunctionReturn(0); 4425 } 4426 4427 #undef __FUNCT__ 4428 #define __FUNCT__ "MatGetLocalMatCondensed" 4429 /*@C 4430 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4431 4432 Not Collective 4433 4434 Input Parameters: 4435 + A - the matrix 4436 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4437 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4438 4439 Output Parameter: 4440 . A_loc - the local sequential matrix generated 4441 4442 Level: developer 4443 4444 @*/ 4445 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4446 { 4447 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4448 PetscErrorCode ierr; 4449 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4450 IS isrowa,iscola; 4451 Mat *aloc; 4452 4453 PetscFunctionBegin; 4454 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4455 if (!row){ 4456 start = A->rmap->rstart; end = A->rmap->rend; 4457 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4458 } else { 4459 isrowa = *row; 4460 } 4461 if (!col){ 4462 start = A->cmap->rstart; 4463 cmap = a->garray; 4464 nzA = a->A->cmap->n; 4465 nzB = a->B->cmap->n; 4466 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4467 ncols = 0; 4468 for (i=0; i<nzB; i++) { 4469 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4470 else break; 4471 } 4472 imark = i; 4473 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4474 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4475 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4476 ierr = PetscFree(idx);CHKERRQ(ierr); 4477 } else { 4478 iscola = *col; 4479 } 4480 if (scall != MAT_INITIAL_MATRIX){ 4481 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4482 aloc[0] = *A_loc; 4483 } 4484 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4485 *A_loc = aloc[0]; 4486 ierr = PetscFree(aloc);CHKERRQ(ierr); 4487 if (!row){ 4488 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4489 } 4490 if (!col){ 4491 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4492 } 4493 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4494 PetscFunctionReturn(0); 4495 } 4496 4497 #undef __FUNCT__ 4498 #define __FUNCT__ "MatGetBrowsOfAcols" 4499 /*@C 4500 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4501 4502 Collective on Mat 4503 4504 Input Parameters: 4505 + A,B - the matrices in mpiaij format 4506 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4507 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4508 4509 Output Parameter: 4510 + rowb, colb - index sets of rows and columns of B to extract 4511 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4512 - B_seq - the sequential matrix generated 4513 4514 Level: developer 4515 4516 @*/ 4517 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4518 { 4519 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4520 PetscErrorCode ierr; 4521 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4522 IS isrowb,iscolb; 4523 Mat *bseq; 4524 4525 PetscFunctionBegin; 4526 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4527 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4528 } 4529 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4530 4531 if (scall == MAT_INITIAL_MATRIX){ 4532 start = A->cmap->rstart; 4533 cmap = a->garray; 4534 nzA = a->A->cmap->n; 4535 nzB = a->B->cmap->n; 4536 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4537 ncols = 0; 4538 for (i=0; i<nzB; i++) { /* row < local row index */ 4539 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4540 else break; 4541 } 4542 imark = i; 4543 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4544 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4545 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4546 ierr = PetscFree(idx);CHKERRQ(ierr); 4547 *brstart = imark; 4548 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4549 } else { 4550 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4551 isrowb = *rowb; iscolb = *colb; 4552 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4553 bseq[0] = *B_seq; 4554 } 4555 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4556 *B_seq = bseq[0]; 4557 ierr = PetscFree(bseq);CHKERRQ(ierr); 4558 if (!rowb){ 4559 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4560 } else { 4561 *rowb = isrowb; 4562 } 4563 if (!colb){ 4564 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4565 } else { 4566 *colb = iscolb; 4567 } 4568 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4569 PetscFunctionReturn(0); 4570 } 4571 4572 #undef __FUNCT__ 4573 #define __FUNCT__ "MatGetBrowsOfAoCols" 4574 /*@C 4575 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4576 of the OFF-DIAGONAL portion of local A 4577 4578 Collective on Mat 4579 4580 Input Parameters: 4581 + A,B - the matrices in mpiaij format 4582 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4583 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4584 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4585 4586 Output Parameter: 4587 + B_oth - the sequential matrix generated 4588 4589 Level: developer 4590 4591 @*/ 4592 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4593 { 4594 VecScatter_MPI_General *gen_to,*gen_from; 4595 PetscErrorCode ierr; 4596 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4597 Mat_SeqAIJ *b_oth; 4598 VecScatter ctx=a->Mvctx; 4599 MPI_Comm comm=((PetscObject)ctx)->comm; 4600 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4601 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4602 PetscScalar *rvalues,*svalues; 4603 MatScalar *b_otha,*bufa,*bufA; 4604 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4605 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4606 MPI_Status *sstatus,rstatus; 4607 PetscMPIInt jj; 4608 PetscInt *cols,sbs,rbs; 4609 PetscScalar *vals; 4610 4611 PetscFunctionBegin; 4612 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4613 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4614 } 4615 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4616 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4617 4618 gen_to = (VecScatter_MPI_General*)ctx->todata; 4619 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4620 rvalues = gen_from->values; /* holds the length of receiving row */ 4621 svalues = gen_to->values; /* holds the length of sending row */ 4622 nrecvs = gen_from->n; 4623 nsends = gen_to->n; 4624 4625 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4626 srow = gen_to->indices; /* local row index to be sent */ 4627 sstarts = gen_to->starts; 4628 sprocs = gen_to->procs; 4629 sstatus = gen_to->sstatus; 4630 sbs = gen_to->bs; 4631 rstarts = gen_from->starts; 4632 rprocs = gen_from->procs; 4633 rbs = gen_from->bs; 4634 4635 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4636 if (scall == MAT_INITIAL_MATRIX){ 4637 /* i-array */ 4638 /*---------*/ 4639 /* post receives */ 4640 for (i=0; i<nrecvs; i++){ 4641 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4642 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4643 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4644 } 4645 4646 /* pack the outgoing message */ 4647 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4648 rstartsj = sstartsj + nsends +1; 4649 sstartsj[0] = 0; rstartsj[0] = 0; 4650 len = 0; /* total length of j or a array to be sent */ 4651 k = 0; 4652 for (i=0; i<nsends; i++){ 4653 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4654 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4655 for (j=0; j<nrows; j++) { 4656 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4657 for (l=0; l<sbs; l++){ 4658 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4659 rowlen[j*sbs+l] = ncols; 4660 len += ncols; 4661 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4662 } 4663 k++; 4664 } 4665 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4666 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4667 } 4668 /* recvs and sends of i-array are completed */ 4669 i = nrecvs; 4670 while (i--) { 4671 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4672 } 4673 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4674 4675 /* allocate buffers for sending j and a arrays */ 4676 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4677 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4678 4679 /* create i-array of B_oth */ 4680 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4681 b_othi[0] = 0; 4682 len = 0; /* total length of j or a array to be received */ 4683 k = 0; 4684 for (i=0; i<nrecvs; i++){ 4685 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4686 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4687 for (j=0; j<nrows; j++) { 4688 b_othi[k+1] = b_othi[k] + rowlen[j]; 4689 len += rowlen[j]; k++; 4690 } 4691 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4692 } 4693 4694 /* allocate space for j and a arrrays of B_oth */ 4695 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4696 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4697 4698 /* j-array */ 4699 /*---------*/ 4700 /* post receives of j-array */ 4701 for (i=0; i<nrecvs; i++){ 4702 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4703 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4704 } 4705 4706 /* pack the outgoing message j-array */ 4707 k = 0; 4708 for (i=0; i<nsends; i++){ 4709 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4710 bufJ = bufj+sstartsj[i]; 4711 for (j=0; j<nrows; j++) { 4712 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4713 for (ll=0; ll<sbs; ll++){ 4714 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4715 for (l=0; l<ncols; l++){ 4716 *bufJ++ = cols[l]; 4717 } 4718 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4719 } 4720 } 4721 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4722 } 4723 4724 /* recvs and sends of j-array are completed */ 4725 i = nrecvs; 4726 while (i--) { 4727 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4728 } 4729 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4730 } else if (scall == MAT_REUSE_MATRIX){ 4731 sstartsj = *startsj; 4732 rstartsj = sstartsj + nsends +1; 4733 bufa = *bufa_ptr; 4734 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4735 b_otha = b_oth->a; 4736 } else { 4737 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4738 } 4739 4740 /* a-array */ 4741 /*---------*/ 4742 /* post receives of a-array */ 4743 for (i=0; i<nrecvs; i++){ 4744 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4745 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4746 } 4747 4748 /* pack the outgoing message a-array */ 4749 k = 0; 4750 for (i=0; i<nsends; i++){ 4751 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4752 bufA = bufa+sstartsj[i]; 4753 for (j=0; j<nrows; j++) { 4754 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4755 for (ll=0; ll<sbs; ll++){ 4756 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4757 for (l=0; l<ncols; l++){ 4758 *bufA++ = vals[l]; 4759 } 4760 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4761 } 4762 } 4763 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4764 } 4765 /* recvs and sends of a-array are completed */ 4766 i = nrecvs; 4767 while (i--) { 4768 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4769 } 4770 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4771 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4772 4773 if (scall == MAT_INITIAL_MATRIX){ 4774 /* put together the new matrix */ 4775 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4776 4777 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4778 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4779 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4780 b_oth->free_a = PETSC_TRUE; 4781 b_oth->free_ij = PETSC_TRUE; 4782 b_oth->nonew = 0; 4783 4784 ierr = PetscFree(bufj);CHKERRQ(ierr); 4785 if (!startsj || !bufa_ptr){ 4786 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4787 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4788 } else { 4789 *startsj = sstartsj; 4790 *bufa_ptr = bufa; 4791 } 4792 } 4793 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4794 PetscFunctionReturn(0); 4795 } 4796 4797 #undef __FUNCT__ 4798 #define __FUNCT__ "MatGetCommunicationStructs" 4799 /*@C 4800 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4801 4802 Not Collective 4803 4804 Input Parameters: 4805 . A - The matrix in mpiaij format 4806 4807 Output Parameter: 4808 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4809 . colmap - A map from global column index to local index into lvec 4810 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4811 4812 Level: developer 4813 4814 @*/ 4815 #if defined (PETSC_USE_CTABLE) 4816 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4817 #else 4818 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4819 #endif 4820 { 4821 Mat_MPIAIJ *a; 4822 4823 PetscFunctionBegin; 4824 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4825 PetscValidPointer(lvec, 2) 4826 PetscValidPointer(colmap, 3) 4827 PetscValidPointer(multScatter, 4) 4828 a = (Mat_MPIAIJ *) A->data; 4829 if (lvec) *lvec = a->lvec; 4830 if (colmap) *colmap = a->colmap; 4831 if (multScatter) *multScatter = a->Mvctx; 4832 PetscFunctionReturn(0); 4833 } 4834 4835 EXTERN_C_BEGIN 4836 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4837 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4838 EXTERN_C_END 4839 4840 #include "src/mat/impls/dense/mpi/mpidense.h" 4841 4842 #undef __FUNCT__ 4843 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4844 /* 4845 Computes (B'*A')' since computing B*A directly is untenable 4846 4847 n p p 4848 ( ) ( ) ( ) 4849 m ( A ) * n ( B ) = m ( C ) 4850 ( ) ( ) ( ) 4851 4852 */ 4853 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4854 { 4855 PetscErrorCode ierr; 4856 Mat At,Bt,Ct; 4857 4858 PetscFunctionBegin; 4859 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4860 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4861 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4862 ierr = MatDestroy(At);CHKERRQ(ierr); 4863 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4864 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4865 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4866 PetscFunctionReturn(0); 4867 } 4868 4869 #undef __FUNCT__ 4870 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4871 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4872 { 4873 PetscErrorCode ierr; 4874 PetscInt m=A->rmap->n,n=B->cmap->n; 4875 Mat Cmat; 4876 4877 PetscFunctionBegin; 4878 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4879 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4880 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4881 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4882 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4883 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4884 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4885 *C = Cmat; 4886 PetscFunctionReturn(0); 4887 } 4888 4889 /* ----------------------------------------------------------------*/ 4890 #undef __FUNCT__ 4891 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4892 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4893 { 4894 PetscErrorCode ierr; 4895 4896 PetscFunctionBegin; 4897 if (scall == MAT_INITIAL_MATRIX){ 4898 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4899 } 4900 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4901 PetscFunctionReturn(0); 4902 } 4903 4904 EXTERN_C_BEGIN 4905 #if defined(PETSC_HAVE_MUMPS) 4906 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4907 #endif 4908 #if defined(PETSC_HAVE_SUPERLU_DIST) 4909 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4910 #endif 4911 #if defined(PETSC_HAVE_SPOOLES) 4912 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4913 #endif 4914 EXTERN_C_END 4915 4916 /*MC 4917 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4918 4919 Options Database Keys: 4920 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4921 4922 Level: beginner 4923 4924 .seealso: MatCreateMPIAIJ() 4925 M*/ 4926 4927 EXTERN_C_BEGIN 4928 #undef __FUNCT__ 4929 #define __FUNCT__ "MatCreate_MPIAIJ" 4930 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4931 { 4932 Mat_MPIAIJ *b; 4933 PetscErrorCode ierr; 4934 PetscMPIInt size; 4935 4936 PetscFunctionBegin; 4937 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4938 4939 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4940 B->data = (void*)b; 4941 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4942 B->rmap->bs = 1; 4943 B->assembled = PETSC_FALSE; 4944 B->mapping = 0; 4945 4946 B->insertmode = NOT_SET_VALUES; 4947 b->size = size; 4948 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4949 4950 /* build cache for off array entries formed */ 4951 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4952 b->donotstash = PETSC_FALSE; 4953 b->colmap = 0; 4954 b->garray = 0; 4955 b->roworiented = PETSC_TRUE; 4956 4957 /* stuff used for matrix vector multiply */ 4958 b->lvec = PETSC_NULL; 4959 b->Mvctx = PETSC_NULL; 4960 4961 /* stuff for MatGetRow() */ 4962 b->rowindices = 0; 4963 b->rowvalues = 0; 4964 b->getrowactive = PETSC_FALSE; 4965 4966 #if defined(PETSC_HAVE_SPOOLES) 4967 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 4968 "MatGetFactor_mpiaij_spooles", 4969 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 4970 #endif 4971 #if defined(PETSC_HAVE_MUMPS) 4972 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 4973 "MatGetFactor_mpiaij_mumps", 4974 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 4975 #endif 4976 #if defined(PETSC_HAVE_SUPERLU_DIST) 4977 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 4978 "MatGetFactor_mpiaij_superlu_dist", 4979 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 4980 #endif 4981 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4982 "MatStoreValues_MPIAIJ", 4983 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4984 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4985 "MatRetrieveValues_MPIAIJ", 4986 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4987 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4988 "MatGetDiagonalBlock_MPIAIJ", 4989 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4990 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4991 "MatIsTranspose_MPIAIJ", 4992 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4993 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4994 "MatMPIAIJSetPreallocation_MPIAIJ", 4995 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4996 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4997 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4998 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4999 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5000 "MatDiagonalScaleLocal_MPIAIJ", 5001 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5002 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5003 "MatConvert_MPIAIJ_MPICSRPERM", 5004 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5005 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5006 "MatConvert_MPIAIJ_MPICRL", 5007 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5008 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5009 "MatMatMult_MPIDense_MPIAIJ", 5010 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5011 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5012 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5013 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5015 "MatMatMultNumeric_MPIDense_MPIAIJ", 5016 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5017 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5018 PetscFunctionReturn(0); 5019 } 5020 EXTERN_C_END 5021 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5024 /*@ 5025 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5026 and "off-diagonal" part of the matrix in CSR format. 5027 5028 Collective on MPI_Comm 5029 5030 Input Parameters: 5031 + comm - MPI communicator 5032 . m - number of local rows (Cannot be PETSC_DECIDE) 5033 . n - This value should be the same as the local size used in creating the 5034 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5035 calculated if N is given) For square matrices n is almost always m. 5036 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5037 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5038 . i - row indices for "diagonal" portion of matrix 5039 . j - column indices 5040 . a - matrix values 5041 . oi - row indices for "off-diagonal" portion of matrix 5042 . oj - column indices 5043 - oa - matrix values 5044 5045 Output Parameter: 5046 . mat - the matrix 5047 5048 Level: advanced 5049 5050 Notes: 5051 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5052 5053 The i and j indices are 0 based 5054 5055 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5056 5057 5058 .keywords: matrix, aij, compressed row, sparse, parallel 5059 5060 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5061 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5062 @*/ 5063 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5064 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5065 { 5066 PetscErrorCode ierr; 5067 Mat_MPIAIJ *maij; 5068 5069 PetscFunctionBegin; 5070 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5071 if (i[0]) { 5072 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5073 } 5074 if (oi[0]) { 5075 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5076 } 5077 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5078 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5079 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5080 maij = (Mat_MPIAIJ*) (*mat)->data; 5081 maij->donotstash = PETSC_TRUE; 5082 (*mat)->preallocated = PETSC_TRUE; 5083 5084 (*mat)->rmap->bs = (*mat)->cmap->bs = 1; 5085 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5086 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5087 5088 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5089 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5090 5091 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5092 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5093 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5094 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5095 5096 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5097 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5098 PetscFunctionReturn(0); 5099 } 5100 5101 /* 5102 Special version for direct calls from Fortran 5103 */ 5104 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5105 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5106 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5107 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5108 #endif 5109 5110 /* Change these macros so can be used in void function */ 5111 #undef CHKERRQ 5112 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5113 #undef SETERRQ2 5114 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5115 #undef SETERRQ 5116 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5117 5118 EXTERN_C_BEGIN 5119 #undef __FUNCT__ 5120 #define __FUNCT__ "matsetvaluesmpiaij_" 5121 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5122 { 5123 Mat mat = *mmat; 5124 PetscInt m = *mm, n = *mn; 5125 InsertMode addv = *maddv; 5126 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5127 PetscScalar value; 5128 PetscErrorCode ierr; 5129 5130 MatPreallocated(mat); 5131 if (mat->insertmode == NOT_SET_VALUES) { 5132 mat->insertmode = addv; 5133 } 5134 #if defined(PETSC_USE_DEBUG) 5135 else if (mat->insertmode != addv) { 5136 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5137 } 5138 #endif 5139 { 5140 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5141 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5142 PetscTruth roworiented = aij->roworiented; 5143 5144 /* Some Variables required in the macro */ 5145 Mat A = aij->A; 5146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5148 MatScalar *aa = a->a; 5149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5150 Mat B = aij->B; 5151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5153 MatScalar *ba = b->a; 5154 5155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5156 PetscInt nonew = a->nonew; 5157 MatScalar *ap1,*ap2; 5158 5159 PetscFunctionBegin; 5160 for (i=0; i<m; i++) { 5161 if (im[i] < 0) continue; 5162 #if defined(PETSC_USE_DEBUG) 5163 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5164 #endif 5165 if (im[i] >= rstart && im[i] < rend) { 5166 row = im[i] - rstart; 5167 lastcol1 = -1; 5168 rp1 = aj + ai[row]; 5169 ap1 = aa + ai[row]; 5170 rmax1 = aimax[row]; 5171 nrow1 = ailen[row]; 5172 low1 = 0; 5173 high1 = nrow1; 5174 lastcol2 = -1; 5175 rp2 = bj + bi[row]; 5176 ap2 = ba + bi[row]; 5177 rmax2 = bimax[row]; 5178 nrow2 = bilen[row]; 5179 low2 = 0; 5180 high2 = nrow2; 5181 5182 for (j=0; j<n; j++) { 5183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5185 if (in[j] >= cstart && in[j] < cend){ 5186 col = in[j] - cstart; 5187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5188 } else if (in[j] < 0) continue; 5189 #if defined(PETSC_USE_DEBUG) 5190 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5191 #endif 5192 else { 5193 if (mat->was_assembled) { 5194 if (!aij->colmap) { 5195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5196 } 5197 #if defined (PETSC_USE_CTABLE) 5198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5199 col--; 5200 #else 5201 col = aij->colmap[in[j]] - 1; 5202 #endif 5203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5205 col = in[j]; 5206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5207 B = aij->B; 5208 b = (Mat_SeqAIJ*)B->data; 5209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5210 rp2 = bj + bi[row]; 5211 ap2 = ba + bi[row]; 5212 rmax2 = bimax[row]; 5213 nrow2 = bilen[row]; 5214 low2 = 0; 5215 high2 = nrow2; 5216 bm = aij->B->rmap->n; 5217 ba = b->a; 5218 } 5219 } else col = in[j]; 5220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5221 } 5222 } 5223 } else { 5224 if (!aij->donotstash) { 5225 if (roworiented) { 5226 if (ignorezeroentries && v[i*n] == 0.0) continue; 5227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5228 } else { 5229 if (ignorezeroentries && v[i] == 0.0) continue; 5230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5231 } 5232 } 5233 } 5234 }} 5235 PetscFunctionReturnVoid(); 5236 } 5237 EXTERN_C_END 5238 5239