1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "../src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap->N; 932 header[2] = mat->cmap->N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap->n; 938 range = mat->rmap->range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap->n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap->n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap->n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap->n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap->N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 1114 1115 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1116 if (!rank) { 1117 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1118 } else { 1119 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1120 } 1121 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1122 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1123 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1125 1126 /* copy over the A part */ 1127 Aloc = (Mat_SeqAIJ*)aij->A->data; 1128 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1129 row = mat->rmap->rstart; 1130 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1131 for (i=0; i<m; i++) { 1132 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1133 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1134 } 1135 aj = Aloc->j; 1136 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1137 1138 /* copy over the B part */ 1139 Aloc = (Mat_SeqAIJ*)aij->B->data; 1140 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1141 row = mat->rmap->rstart; 1142 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1143 ct = cols; 1144 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1148 } 1149 ierr = PetscFree(ct);CHKERRQ(ierr); 1150 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1151 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 /* 1153 Everyone has to call to draw the matrix since the graphics waits are 1154 synchronized across all processors that share the PetscDraw object 1155 */ 1156 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1157 if (!rank) { 1158 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1159 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1160 } 1161 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1162 ierr = MatDestroy(A);CHKERRQ(ierr); 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatView_MPIAIJ" 1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscErrorCode ierr; 1172 PetscTruth iascii,isdraw,issocket,isbinary; 1173 1174 PetscFunctionBegin; 1175 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1176 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1177 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1178 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1179 if (iascii || isdraw || isbinary || issocket) { 1180 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1181 } else { 1182 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRelax_MPIAIJ" 1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1190 { 1191 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1192 PetscErrorCode ierr; 1193 Vec bb1; 1194 1195 PetscFunctionBegin; 1196 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1197 1198 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1199 if (flag & SOR_ZERO_INITIAL_GUESS) { 1200 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1201 its--; 1202 } 1203 1204 while (its--) { 1205 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 1208 /* update rhs: bb1 = bb - B*x */ 1209 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1211 1212 /* local sweep */ 1213 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1214 } 1215 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1216 if (flag & SOR_ZERO_INITIAL_GUESS) { 1217 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1218 its--; 1219 } 1220 while (its--) { 1221 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 1224 /* update rhs: bb1 = bb - B*x */ 1225 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1226 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1227 1228 /* local sweep */ 1229 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1230 } 1231 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1232 if (flag & SOR_ZERO_INITIAL_GUESS) { 1233 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1234 its--; 1235 } 1236 while (its--) { 1237 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* update rhs: bb1 = bb - B*x */ 1241 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1242 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1243 1244 /* local sweep */ 1245 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1246 } 1247 } else { 1248 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1249 } 1250 1251 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatPermute_MPIAIJ" 1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1258 { 1259 MPI_Comm comm,pcomm; 1260 PetscInt first,local_size,nrows; 1261 const PetscInt *rows; 1262 int ntids; 1263 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1268 /* make a collective version of 'rowp' */ 1269 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1270 if (pcomm==comm) { 1271 crowp = rowp; 1272 } else { 1273 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1274 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1275 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1276 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1277 } 1278 /* collect the global row permutation and invert it */ 1279 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1280 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1281 if (pcomm!=comm) { 1282 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1283 } 1284 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1285 /* get the local target indices */ 1286 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1287 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1288 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1289 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1290 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1291 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1292 /* the column permutation is so much easier; 1293 make a local version of 'colp' and invert it */ 1294 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1295 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1296 if (ntids==1) { 1297 lcolp = colp; 1298 } else { 1299 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1300 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1301 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1302 } 1303 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1304 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1305 if (ntids>1) { 1306 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1307 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1308 } 1309 /* now we just get the submatrix */ 1310 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1311 /* clean up */ 1312 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1313 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1319 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1320 { 1321 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1322 Mat A = mat->A,B = mat->B; 1323 PetscErrorCode ierr; 1324 PetscReal isend[5],irecv[5]; 1325 1326 PetscFunctionBegin; 1327 info->block_size = 1.0; 1328 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1329 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1330 isend[3] = info->memory; isend[4] = info->mallocs; 1331 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1332 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1333 isend[3] += info->memory; isend[4] += info->mallocs; 1334 if (flag == MAT_LOCAL) { 1335 info->nz_used = isend[0]; 1336 info->nz_allocated = isend[1]; 1337 info->nz_unneeded = isend[2]; 1338 info->memory = isend[3]; 1339 info->mallocs = isend[4]; 1340 } else if (flag == MAT_GLOBAL_MAX) { 1341 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1342 info->nz_used = irecv[0]; 1343 info->nz_allocated = irecv[1]; 1344 info->nz_unneeded = irecv[2]; 1345 info->memory = irecv[3]; 1346 info->mallocs = irecv[4]; 1347 } else if (flag == MAT_GLOBAL_SUM) { 1348 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1349 info->nz_used = irecv[0]; 1350 info->nz_allocated = irecv[1]; 1351 info->nz_unneeded = irecv[2]; 1352 info->memory = irecv[3]; 1353 info->mallocs = irecv[4]; 1354 } 1355 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1356 info->fill_ratio_needed = 0; 1357 info->factor_mallocs = 0; 1358 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatSetOption_MPIAIJ" 1364 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1365 { 1366 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 switch (op) { 1371 case MAT_NEW_NONZERO_LOCATIONS: 1372 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1373 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1374 case MAT_KEEP_ZEROED_ROWS: 1375 case MAT_NEW_NONZERO_LOCATION_ERR: 1376 case MAT_USE_INODES: 1377 case MAT_IGNORE_ZERO_ENTRIES: 1378 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1379 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1380 break; 1381 case MAT_ROW_ORIENTED: 1382 a->roworiented = flg; 1383 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1384 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1385 break; 1386 case MAT_NEW_DIAGONALS: 1387 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1388 break; 1389 case MAT_IGNORE_OFF_PROC_ENTRIES: 1390 a->donotstash = PETSC_TRUE; 1391 break; 1392 case MAT_SYMMETRIC: 1393 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1394 break; 1395 case MAT_STRUCTURALLY_SYMMETRIC: 1396 case MAT_HERMITIAN: 1397 case MAT_SYMMETRY_ETERNAL: 1398 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1399 break; 1400 default: 1401 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatGetRow_MPIAIJ" 1408 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1409 { 1410 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1411 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1412 PetscErrorCode ierr; 1413 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1414 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1415 PetscInt *cmap,*idx_p; 1416 1417 PetscFunctionBegin; 1418 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1419 mat->getrowactive = PETSC_TRUE; 1420 1421 if (!mat->rowvalues && (idx || v)) { 1422 /* 1423 allocate enough space to hold information from the longest row. 1424 */ 1425 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1426 PetscInt max = 1,tmp; 1427 for (i=0; i<matin->rmap->n; i++) { 1428 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1429 if (max < tmp) { max = tmp; } 1430 } 1431 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1432 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1433 } 1434 1435 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1436 lrow = row - rstart; 1437 1438 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1439 if (!v) {pvA = 0; pvB = 0;} 1440 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1441 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1442 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1443 nztot = nzA + nzB; 1444 1445 cmap = mat->garray; 1446 if (v || idx) { 1447 if (nztot) { 1448 /* Sort by increasing column numbers, assuming A and B already sorted */ 1449 PetscInt imark = -1; 1450 if (v) { 1451 *v = v_p = mat->rowvalues; 1452 for (i=0; i<nzB; i++) { 1453 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1454 else break; 1455 } 1456 imark = i; 1457 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1458 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1459 } 1460 if (idx) { 1461 *idx = idx_p = mat->rowindices; 1462 if (imark > -1) { 1463 for (i=0; i<imark; i++) { 1464 idx_p[i] = cmap[cworkB[i]]; 1465 } 1466 } else { 1467 for (i=0; i<nzB; i++) { 1468 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1469 else break; 1470 } 1471 imark = i; 1472 } 1473 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1474 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1475 } 1476 } else { 1477 if (idx) *idx = 0; 1478 if (v) *v = 0; 1479 } 1480 } 1481 *nz = nztot; 1482 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1483 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1489 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1490 { 1491 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1492 1493 PetscFunctionBegin; 1494 if (!aij->getrowactive) { 1495 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1496 } 1497 aij->getrowactive = PETSC_FALSE; 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatNorm_MPIAIJ" 1503 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1504 { 1505 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1506 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1507 PetscErrorCode ierr; 1508 PetscInt i,j,cstart = mat->cmap->rstart; 1509 PetscReal sum = 0.0; 1510 MatScalar *v; 1511 1512 PetscFunctionBegin; 1513 if (aij->size == 1) { 1514 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1515 } else { 1516 if (type == NORM_FROBENIUS) { 1517 v = amat->a; 1518 for (i=0; i<amat->nz; i++) { 1519 #if defined(PETSC_USE_COMPLEX) 1520 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1521 #else 1522 sum += (*v)*(*v); v++; 1523 #endif 1524 } 1525 v = bmat->a; 1526 for (i=0; i<bmat->nz; i++) { 1527 #if defined(PETSC_USE_COMPLEX) 1528 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1529 #else 1530 sum += (*v)*(*v); v++; 1531 #endif 1532 } 1533 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1534 *norm = sqrt(*norm); 1535 } else if (type == NORM_1) { /* max column norm */ 1536 PetscReal *tmp,*tmp2; 1537 PetscInt *jj,*garray = aij->garray; 1538 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1539 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1540 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1541 *norm = 0.0; 1542 v = amat->a; jj = amat->j; 1543 for (j=0; j<amat->nz; j++) { 1544 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1545 } 1546 v = bmat->a; jj = bmat->j; 1547 for (j=0; j<bmat->nz; j++) { 1548 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1549 } 1550 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1551 for (j=0; j<mat->cmap->N; j++) { 1552 if (tmp2[j] > *norm) *norm = tmp2[j]; 1553 } 1554 ierr = PetscFree(tmp);CHKERRQ(ierr); 1555 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1556 } else if (type == NORM_INFINITY) { /* max row norm */ 1557 PetscReal ntemp = 0.0; 1558 for (j=0; j<aij->A->rmap->n; j++) { 1559 v = amat->a + amat->i[j]; 1560 sum = 0.0; 1561 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1562 sum += PetscAbsScalar(*v); v++; 1563 } 1564 v = bmat->a + bmat->i[j]; 1565 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1566 sum += PetscAbsScalar(*v); v++; 1567 } 1568 if (sum > ntemp) ntemp = sum; 1569 } 1570 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1571 } else { 1572 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1573 } 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatTranspose_MPIAIJ" 1580 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1581 { 1582 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1583 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1584 PetscErrorCode ierr; 1585 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1586 PetscInt cstart=A->cmap->rstart,ncol; 1587 Mat B; 1588 MatScalar *array; 1589 1590 PetscFunctionBegin; 1591 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1592 1593 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1594 ai = Aloc->i; aj = Aloc->j; 1595 bi = Bloc->i; bj = Bloc->j; 1596 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1597 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1598 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1599 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1600 for (i=0; i<ai[ma]; i++){ 1601 d_nnz[aj[i]] ++; 1602 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1603 } 1604 1605 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1606 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1607 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1608 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1609 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1610 } else { 1611 B = *matout; 1612 } 1613 1614 /* copy over the A part */ 1615 array = Aloc->a; 1616 row = A->rmap->rstart; 1617 for (i=0; i<ma; i++) { 1618 ncol = ai[i+1]-ai[i]; 1619 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1620 row++; array += ncol; aj += ncol; 1621 } 1622 aj = Aloc->j; 1623 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1624 1625 /* copy over the B part */ 1626 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1627 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1628 array = Bloc->a; 1629 row = A->rmap->rstart; 1630 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1631 cols_tmp = cols; 1632 for (i=0; i<mb; i++) { 1633 ncol = bi[i+1]-bi[i]; 1634 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1635 row++; array += ncol; cols_tmp += ncol; 1636 } 1637 ierr = PetscFree(cols);CHKERRQ(ierr); 1638 1639 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1640 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1642 *matout = B; 1643 } else { 1644 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1645 } 1646 PetscFunctionReturn(0); 1647 } 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1651 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1652 { 1653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1654 Mat a = aij->A,b = aij->B; 1655 PetscErrorCode ierr; 1656 PetscInt s1,s2,s3; 1657 1658 PetscFunctionBegin; 1659 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1660 if (rr) { 1661 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1662 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1663 /* Overlap communication with computation. */ 1664 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1665 } 1666 if (ll) { 1667 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1668 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1669 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1670 } 1671 /* scale the diagonal block */ 1672 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1673 1674 if (rr) { 1675 /* Do a scatter end and then right scale the off-diagonal block */ 1676 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1678 } 1679 1680 PetscFunctionReturn(0); 1681 } 1682 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1685 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1686 { 1687 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1688 PetscErrorCode ierr; 1689 1690 PetscFunctionBegin; 1691 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1692 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1693 PetscFunctionReturn(0); 1694 } 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1697 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1698 { 1699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1700 PetscErrorCode ierr; 1701 1702 PetscFunctionBegin; 1703 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatEqual_MPIAIJ" 1709 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1710 { 1711 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1712 Mat a,b,c,d; 1713 PetscTruth flg; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 a = matA->A; b = matA->B; 1718 c = matB->A; d = matB->B; 1719 1720 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1721 if (flg) { 1722 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1723 } 1724 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1725 PetscFunctionReturn(0); 1726 } 1727 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatCopy_MPIAIJ" 1730 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1731 { 1732 PetscErrorCode ierr; 1733 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1734 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1735 1736 PetscFunctionBegin; 1737 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1738 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1739 /* because of the column compression in the off-processor part of the matrix a->B, 1740 the number of columns in a->B and b->B may be different, hence we cannot call 1741 the MatCopy() directly on the two parts. If need be, we can provide a more 1742 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1743 then copying the submatrices */ 1744 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1745 } else { 1746 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1747 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1748 } 1749 PetscFunctionReturn(0); 1750 } 1751 1752 #undef __FUNCT__ 1753 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1754 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1755 { 1756 PetscErrorCode ierr; 1757 1758 PetscFunctionBegin; 1759 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 1763 #include "petscblaslapack.h" 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatAXPY_MPIAIJ" 1766 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1767 { 1768 PetscErrorCode ierr; 1769 PetscInt i; 1770 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1771 PetscBLASInt bnz,one=1; 1772 Mat_SeqAIJ *x,*y; 1773 1774 PetscFunctionBegin; 1775 if (str == SAME_NONZERO_PATTERN) { 1776 PetscScalar alpha = a; 1777 x = (Mat_SeqAIJ *)xx->A->data; 1778 y = (Mat_SeqAIJ *)yy->A->data; 1779 bnz = PetscBLASIntCast(x->nz); 1780 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1781 x = (Mat_SeqAIJ *)xx->B->data; 1782 y = (Mat_SeqAIJ *)yy->B->data; 1783 bnz = PetscBLASIntCast(x->nz); 1784 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1785 } else if (str == SUBSET_NONZERO_PATTERN) { 1786 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1787 1788 x = (Mat_SeqAIJ *)xx->B->data; 1789 y = (Mat_SeqAIJ *)yy->B->data; 1790 if (y->xtoy && y->XtoY != xx->B) { 1791 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1792 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1793 } 1794 if (!y->xtoy) { /* get xtoy */ 1795 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1796 y->XtoY = xx->B; 1797 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1798 } 1799 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1800 } else { 1801 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1802 } 1803 PetscFunctionReturn(0); 1804 } 1805 1806 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatConjugate_MPIAIJ" 1810 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1811 { 1812 #if defined(PETSC_USE_COMPLEX) 1813 PetscErrorCode ierr; 1814 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1815 1816 PetscFunctionBegin; 1817 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1818 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1819 #else 1820 PetscFunctionBegin; 1821 #endif 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatRealPart_MPIAIJ" 1827 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1828 { 1829 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1830 PetscErrorCode ierr; 1831 1832 PetscFunctionBegin; 1833 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1834 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1840 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1841 { 1842 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1843 PetscErrorCode ierr; 1844 1845 PetscFunctionBegin; 1846 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1847 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1848 PetscFunctionReturn(0); 1849 } 1850 1851 #ifdef PETSC_HAVE_PBGL 1852 1853 #include <boost/parallel/mpi/bsp_process_group.hpp> 1854 #include <boost/graph/distributed/ilu_default_graph.hpp> 1855 #include <boost/graph/distributed/ilu_0_block.hpp> 1856 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1857 #include <boost/graph/distributed/petsc/interface.hpp> 1858 #include <boost/multi_array.hpp> 1859 #include <boost/parallel/distributed_property_map->hpp> 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1863 /* 1864 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1865 */ 1866 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1867 { 1868 namespace petsc = boost::distributed::petsc; 1869 1870 namespace graph_dist = boost::graph::distributed; 1871 using boost::graph::distributed::ilu_default::process_group_type; 1872 using boost::graph::ilu_permuted; 1873 1874 PetscTruth row_identity, col_identity; 1875 PetscContainer c; 1876 PetscInt m, n, M, N; 1877 PetscErrorCode ierr; 1878 1879 PetscFunctionBegin; 1880 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1881 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1882 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1883 if (!row_identity || !col_identity) { 1884 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1885 } 1886 1887 process_group_type pg; 1888 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1889 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1890 lgraph_type& level_graph = *lgraph_p; 1891 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1892 1893 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1894 ilu_permuted(level_graph); 1895 1896 /* put together the new matrix */ 1897 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1898 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1899 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1900 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1901 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1902 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1903 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1904 1905 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1906 ierr = PetscContainerSetPointer(c, lgraph_p); 1907 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1913 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1914 { 1915 PetscFunctionBegin; 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatSolve_MPIAIJ" 1921 /* 1922 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1923 */ 1924 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1925 { 1926 namespace graph_dist = boost::graph::distributed; 1927 1928 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1929 lgraph_type* lgraph_p; 1930 PetscContainer c; 1931 PetscErrorCode ierr; 1932 1933 PetscFunctionBegin; 1934 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1935 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1936 ierr = VecCopy(b, x); CHKERRQ(ierr); 1937 1938 PetscScalar* array_x; 1939 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1940 PetscInt sx; 1941 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1942 1943 PetscScalar* array_b; 1944 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1945 PetscInt sb; 1946 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1947 1948 lgraph_type& level_graph = *lgraph_p; 1949 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1950 1951 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1952 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1953 ref_x(array_x, boost::extents[num_vertices(graph)]); 1954 1955 typedef boost::iterator_property_map<array_ref_type::iterator, 1956 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1957 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1958 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1959 1960 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1961 1962 PetscFunctionReturn(0); 1963 } 1964 #endif 1965 1966 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1967 PetscInt nzlocal,nsends,nrecvs; 1968 PetscMPIInt *send_rank; 1969 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1970 PetscScalar *sbuf_a,**rbuf_a; 1971 PetscErrorCode (*MatDestroy)(Mat); 1972 } Mat_Redundant; 1973 1974 #undef __FUNCT__ 1975 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1976 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1977 { 1978 PetscErrorCode ierr; 1979 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1980 PetscInt i; 1981 1982 PetscFunctionBegin; 1983 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1984 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1985 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1986 for (i=0; i<redund->nrecvs; i++){ 1987 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1988 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1989 } 1990 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1991 ierr = PetscFree(redund);CHKERRQ(ierr); 1992 PetscFunctionReturn(0); 1993 } 1994 1995 #undef __FUNCT__ 1996 #define __FUNCT__ "MatDestroy_MatRedundant" 1997 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1998 { 1999 PetscErrorCode ierr; 2000 PetscContainer container; 2001 Mat_Redundant *redund=PETSC_NULL; 2002 2003 PetscFunctionBegin; 2004 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2005 if (container) { 2006 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2007 } else { 2008 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2009 } 2010 A->ops->destroy = redund->MatDestroy; 2011 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2012 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2013 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2014 PetscFunctionReturn(0); 2015 } 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2019 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2020 { 2021 PetscMPIInt rank,size; 2022 MPI_Comm comm=((PetscObject)mat)->comm; 2023 PetscErrorCode ierr; 2024 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2025 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2026 PetscInt *rowrange=mat->rmap->range; 2027 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2028 Mat A=aij->A,B=aij->B,C=*matredundant; 2029 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2030 PetscScalar *sbuf_a; 2031 PetscInt nzlocal=a->nz+b->nz; 2032 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2033 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2034 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2035 MatScalar *aworkA,*aworkB; 2036 PetscScalar *vals; 2037 PetscMPIInt tag1,tag2,tag3,imdex; 2038 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2039 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2040 MPI_Status recv_status,*send_status; 2041 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2042 PetscInt **rbuf_j=PETSC_NULL; 2043 PetscScalar **rbuf_a=PETSC_NULL; 2044 Mat_Redundant *redund=PETSC_NULL; 2045 PetscContainer container; 2046 2047 PetscFunctionBegin; 2048 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2049 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2050 2051 if (reuse == MAT_REUSE_MATRIX) { 2052 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2053 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2054 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2055 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2056 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2057 if (container) { 2058 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2059 } else { 2060 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2061 } 2062 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2063 2064 nsends = redund->nsends; 2065 nrecvs = redund->nrecvs; 2066 send_rank = redund->send_rank; recv_rank = send_rank + size; 2067 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2068 sbuf_j = redund->sbuf_j; 2069 sbuf_a = redund->sbuf_a; 2070 rbuf_j = redund->rbuf_j; 2071 rbuf_a = redund->rbuf_a; 2072 } 2073 2074 if (reuse == MAT_INITIAL_MATRIX){ 2075 PetscMPIInt subrank,subsize; 2076 PetscInt nleftover,np_subcomm; 2077 /* get the destination processors' id send_rank, nsends and nrecvs */ 2078 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2079 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2080 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2081 recv_rank = send_rank + size; 2082 np_subcomm = size/nsubcomm; 2083 nleftover = size - nsubcomm*np_subcomm; 2084 nsends = 0; nrecvs = 0; 2085 for (i=0; i<size; i++){ /* i=rank*/ 2086 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2087 send_rank[nsends] = i; nsends++; 2088 recv_rank[nrecvs++] = i; 2089 } 2090 } 2091 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2092 i = size-nleftover-1; 2093 j = 0; 2094 while (j < nsubcomm - nleftover){ 2095 send_rank[nsends++] = i; 2096 i--; j++; 2097 } 2098 } 2099 2100 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2101 for (i=0; i<nleftover; i++){ 2102 recv_rank[nrecvs++] = size-nleftover+i; 2103 } 2104 } 2105 2106 /* allocate sbuf_j, sbuf_a */ 2107 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2108 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2109 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2110 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2111 2112 /* copy mat's local entries into the buffers */ 2113 if (reuse == MAT_INITIAL_MATRIX){ 2114 rownz_max = 0; 2115 rptr = sbuf_j; 2116 cols = sbuf_j + rend-rstart + 1; 2117 vals = sbuf_a; 2118 rptr[0] = 0; 2119 for (i=0; i<rend-rstart; i++){ 2120 row = i + rstart; 2121 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2122 ncols = nzA + nzB; 2123 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2124 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2125 /* load the column indices for this row into cols */ 2126 lwrite = 0; 2127 for (l=0; l<nzB; l++) { 2128 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2129 vals[lwrite] = aworkB[l]; 2130 cols[lwrite++] = ctmp; 2131 } 2132 } 2133 for (l=0; l<nzA; l++){ 2134 vals[lwrite] = aworkA[l]; 2135 cols[lwrite++] = cstart + cworkA[l]; 2136 } 2137 for (l=0; l<nzB; l++) { 2138 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2139 vals[lwrite] = aworkB[l]; 2140 cols[lwrite++] = ctmp; 2141 } 2142 } 2143 vals += ncols; 2144 cols += ncols; 2145 rptr[i+1] = rptr[i] + ncols; 2146 if (rownz_max < ncols) rownz_max = ncols; 2147 } 2148 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2149 } else { /* only copy matrix values into sbuf_a */ 2150 rptr = sbuf_j; 2151 vals = sbuf_a; 2152 rptr[0] = 0; 2153 for (i=0; i<rend-rstart; i++){ 2154 row = i + rstart; 2155 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2156 ncols = nzA + nzB; 2157 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2158 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2159 lwrite = 0; 2160 for (l=0; l<nzB; l++) { 2161 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2162 } 2163 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2164 for (l=0; l<nzB; l++) { 2165 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2166 } 2167 vals += ncols; 2168 rptr[i+1] = rptr[i] + ncols; 2169 } 2170 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2171 2172 /* send nzlocal to others, and recv other's nzlocal */ 2173 /*--------------------------------------------------*/ 2174 if (reuse == MAT_INITIAL_MATRIX){ 2175 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2176 s_waits2 = s_waits3 + nsends; 2177 s_waits1 = s_waits2 + nsends; 2178 r_waits1 = s_waits1 + nsends; 2179 r_waits2 = r_waits1 + nrecvs; 2180 r_waits3 = r_waits2 + nrecvs; 2181 } else { 2182 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2183 r_waits3 = s_waits3 + nsends; 2184 } 2185 2186 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2187 if (reuse == MAT_INITIAL_MATRIX){ 2188 /* get new tags to keep the communication clean */ 2189 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2190 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2191 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2192 rbuf_nz = sbuf_nz + nsends; 2193 2194 /* post receives of other's nzlocal */ 2195 for (i=0; i<nrecvs; i++){ 2196 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2197 } 2198 /* send nzlocal to others */ 2199 for (i=0; i<nsends; i++){ 2200 sbuf_nz[i] = nzlocal; 2201 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2202 } 2203 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2204 count = nrecvs; 2205 while (count) { 2206 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2207 recv_rank[imdex] = recv_status.MPI_SOURCE; 2208 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2209 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2210 2211 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2212 rbuf_nz[imdex] += i + 2; 2213 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2214 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2215 count--; 2216 } 2217 /* wait on sends of nzlocal */ 2218 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2219 /* send mat->i,j to others, and recv from other's */ 2220 /*------------------------------------------------*/ 2221 for (i=0; i<nsends; i++){ 2222 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2223 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2224 } 2225 /* wait on receives of mat->i,j */ 2226 /*------------------------------*/ 2227 count = nrecvs; 2228 while (count) { 2229 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2230 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2231 count--; 2232 } 2233 /* wait on sends of mat->i,j */ 2234 /*---------------------------*/ 2235 if (nsends) { 2236 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2237 } 2238 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2239 2240 /* post receives, send and receive mat->a */ 2241 /*----------------------------------------*/ 2242 for (imdex=0; imdex<nrecvs; imdex++) { 2243 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2244 } 2245 for (i=0; i<nsends; i++){ 2246 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2247 } 2248 count = nrecvs; 2249 while (count) { 2250 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2251 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2252 count--; 2253 } 2254 if (nsends) { 2255 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2256 } 2257 2258 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2259 2260 /* create redundant matrix */ 2261 /*-------------------------*/ 2262 if (reuse == MAT_INITIAL_MATRIX){ 2263 /* compute rownz_max for preallocation */ 2264 for (imdex=0; imdex<nrecvs; imdex++){ 2265 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2266 rptr = rbuf_j[imdex]; 2267 for (i=0; i<j; i++){ 2268 ncols = rptr[i+1] - rptr[i]; 2269 if (rownz_max < ncols) rownz_max = ncols; 2270 } 2271 } 2272 2273 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2274 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2275 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2276 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2277 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2278 } else { 2279 C = *matredundant; 2280 } 2281 2282 /* insert local matrix entries */ 2283 rptr = sbuf_j; 2284 cols = sbuf_j + rend-rstart + 1; 2285 vals = sbuf_a; 2286 for (i=0; i<rend-rstart; i++){ 2287 row = i + rstart; 2288 ncols = rptr[i+1] - rptr[i]; 2289 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2290 vals += ncols; 2291 cols += ncols; 2292 } 2293 /* insert received matrix entries */ 2294 for (imdex=0; imdex<nrecvs; imdex++){ 2295 rstart = rowrange[recv_rank[imdex]]; 2296 rend = rowrange[recv_rank[imdex]+1]; 2297 rptr = rbuf_j[imdex]; 2298 cols = rbuf_j[imdex] + rend-rstart + 1; 2299 vals = rbuf_a[imdex]; 2300 for (i=0; i<rend-rstart; i++){ 2301 row = i + rstart; 2302 ncols = rptr[i+1] - rptr[i]; 2303 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2304 vals += ncols; 2305 cols += ncols; 2306 } 2307 } 2308 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2309 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2310 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2311 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2312 if (reuse == MAT_INITIAL_MATRIX){ 2313 PetscContainer container; 2314 *matredundant = C; 2315 /* create a supporting struct and attach it to C for reuse */ 2316 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2317 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2318 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2319 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2320 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2321 2322 redund->nzlocal = nzlocal; 2323 redund->nsends = nsends; 2324 redund->nrecvs = nrecvs; 2325 redund->send_rank = send_rank; 2326 redund->sbuf_nz = sbuf_nz; 2327 redund->sbuf_j = sbuf_j; 2328 redund->sbuf_a = sbuf_a; 2329 redund->rbuf_j = rbuf_j; 2330 redund->rbuf_a = rbuf_a; 2331 2332 redund->MatDestroy = C->ops->destroy; 2333 C->ops->destroy = MatDestroy_MatRedundant; 2334 } 2335 PetscFunctionReturn(0); 2336 } 2337 2338 #undef __FUNCT__ 2339 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2340 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2341 { 2342 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2343 PetscErrorCode ierr; 2344 PetscInt i,*idxb = 0; 2345 PetscScalar *va,*vb; 2346 Vec vtmp; 2347 2348 PetscFunctionBegin; 2349 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2350 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2351 if (idx) { 2352 for (i=0; i<A->rmap->n; i++) { 2353 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2354 } 2355 } 2356 2357 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2358 if (idx) { 2359 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2360 } 2361 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2362 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2363 2364 for (i=0; i<A->rmap->n; i++){ 2365 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2366 va[i] = vb[i]; 2367 if (idx) idx[i] = a->garray[idxb[i]]; 2368 } 2369 } 2370 2371 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2372 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2373 if (idxb) { 2374 ierr = PetscFree(idxb);CHKERRQ(ierr); 2375 } 2376 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 #undef __FUNCT__ 2381 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2382 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2383 { 2384 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2385 PetscErrorCode ierr; 2386 PetscInt i,*idxb = 0; 2387 PetscScalar *va,*vb; 2388 Vec vtmp; 2389 2390 PetscFunctionBegin; 2391 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2392 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2393 if (idx) { 2394 for (i=0; i<A->cmap->n; i++) { 2395 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2396 } 2397 } 2398 2399 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2400 if (idx) { 2401 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2402 } 2403 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2404 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2405 2406 for (i=0; i<A->rmap->n; i++){ 2407 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2408 va[i] = vb[i]; 2409 if (idx) idx[i] = a->garray[idxb[i]]; 2410 } 2411 } 2412 2413 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2414 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2415 if (idxb) { 2416 ierr = PetscFree(idxb);CHKERRQ(ierr); 2417 } 2418 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2419 PetscFunctionReturn(0); 2420 } 2421 2422 #undef __FUNCT__ 2423 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2424 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2425 { 2426 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2427 PetscInt n = A->rmap->n; 2428 PetscInt cstart = A->cmap->rstart; 2429 PetscInt *cmap = mat->garray; 2430 PetscInt *diagIdx, *offdiagIdx; 2431 Vec diagV, offdiagV; 2432 PetscScalar *a, *diagA, *offdiagA; 2433 PetscInt r; 2434 PetscErrorCode ierr; 2435 2436 PetscFunctionBegin; 2437 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2438 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2439 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2440 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2441 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2442 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2443 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2444 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2445 for(r = 0; r < n; ++r) { 2446 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2447 a[r] = diagA[r]; 2448 idx[r] = cstart + diagIdx[r]; 2449 } else { 2450 a[r] = offdiagA[r]; 2451 idx[r] = cmap[offdiagIdx[r]]; 2452 } 2453 } 2454 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2455 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2456 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2457 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2458 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2459 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2460 PetscFunctionReturn(0); 2461 } 2462 2463 #undef __FUNCT__ 2464 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2465 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2466 { 2467 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2468 PetscInt n = A->rmap->n; 2469 PetscInt cstart = A->cmap->rstart; 2470 PetscInt *cmap = mat->garray; 2471 PetscInt *diagIdx, *offdiagIdx; 2472 Vec diagV, offdiagV; 2473 PetscScalar *a, *diagA, *offdiagA; 2474 PetscInt r; 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2479 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2480 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2481 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2482 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2483 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2484 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2485 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2486 for(r = 0; r < n; ++r) { 2487 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2488 a[r] = diagA[r]; 2489 idx[r] = cstart + diagIdx[r]; 2490 } else { 2491 a[r] = offdiagA[r]; 2492 idx[r] = cmap[offdiagIdx[r]]; 2493 } 2494 } 2495 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2496 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2497 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2498 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2499 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2500 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2506 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2507 { 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2512 PetscFunctionReturn(0); 2513 } 2514 2515 /* -------------------------------------------------------------------*/ 2516 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2517 MatGetRow_MPIAIJ, 2518 MatRestoreRow_MPIAIJ, 2519 MatMult_MPIAIJ, 2520 /* 4*/ MatMultAdd_MPIAIJ, 2521 MatMultTranspose_MPIAIJ, 2522 MatMultTransposeAdd_MPIAIJ, 2523 #ifdef PETSC_HAVE_PBGL 2524 MatSolve_MPIAIJ, 2525 #else 2526 0, 2527 #endif 2528 0, 2529 0, 2530 /*10*/ 0, 2531 0, 2532 0, 2533 MatRelax_MPIAIJ, 2534 MatTranspose_MPIAIJ, 2535 /*15*/ MatGetInfo_MPIAIJ, 2536 MatEqual_MPIAIJ, 2537 MatGetDiagonal_MPIAIJ, 2538 MatDiagonalScale_MPIAIJ, 2539 MatNorm_MPIAIJ, 2540 /*20*/ MatAssemblyBegin_MPIAIJ, 2541 MatAssemblyEnd_MPIAIJ, 2542 0, 2543 MatSetOption_MPIAIJ, 2544 MatZeroEntries_MPIAIJ, 2545 /*25*/ MatZeroRows_MPIAIJ, 2546 0, 2547 #ifdef PETSC_HAVE_PBGL 2548 0, 2549 #else 2550 0, 2551 #endif 2552 0, 2553 0, 2554 /*30*/ MatSetUpPreallocation_MPIAIJ, 2555 #ifdef PETSC_HAVE_PBGL 2556 0, 2557 #else 2558 0, 2559 #endif 2560 0, 2561 0, 2562 0, 2563 /*35*/ MatDuplicate_MPIAIJ, 2564 0, 2565 0, 2566 0, 2567 0, 2568 /*40*/ MatAXPY_MPIAIJ, 2569 MatGetSubMatrices_MPIAIJ, 2570 MatIncreaseOverlap_MPIAIJ, 2571 MatGetValues_MPIAIJ, 2572 MatCopy_MPIAIJ, 2573 /*45*/ MatGetRowMax_MPIAIJ, 2574 MatScale_MPIAIJ, 2575 0, 2576 0, 2577 0, 2578 /*50*/ MatSetBlockSize_MPIAIJ, 2579 0, 2580 0, 2581 0, 2582 0, 2583 /*55*/ MatFDColoringCreate_MPIAIJ, 2584 0, 2585 MatSetUnfactored_MPIAIJ, 2586 MatPermute_MPIAIJ, 2587 0, 2588 /*60*/ MatGetSubMatrix_MPIAIJ, 2589 MatDestroy_MPIAIJ, 2590 MatView_MPIAIJ, 2591 0, 2592 0, 2593 /*65*/ 0, 2594 0, 2595 0, 2596 0, 2597 0, 2598 /*70*/ MatGetRowMaxAbs_MPIAIJ, 2599 MatGetRowMinAbs_MPIAIJ, 2600 0, 2601 MatSetColoring_MPIAIJ, 2602 #if defined(PETSC_HAVE_ADIC) 2603 MatSetValuesAdic_MPIAIJ, 2604 #else 2605 0, 2606 #endif 2607 MatSetValuesAdifor_MPIAIJ, 2608 /*75*/ 0, 2609 0, 2610 0, 2611 0, 2612 0, 2613 /*80*/ 0, 2614 0, 2615 0, 2616 /*84*/ MatLoad_MPIAIJ, 2617 0, 2618 0, 2619 0, 2620 0, 2621 0, 2622 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2623 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2624 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2625 MatPtAP_Basic, 2626 MatPtAPSymbolic_MPIAIJ, 2627 /*95*/ MatPtAPNumeric_MPIAIJ, 2628 0, 2629 0, 2630 0, 2631 0, 2632 /*100*/0, 2633 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2634 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2635 MatConjugate_MPIAIJ, 2636 0, 2637 /*105*/MatSetValuesRow_MPIAIJ, 2638 MatRealPart_MPIAIJ, 2639 MatImaginaryPart_MPIAIJ, 2640 0, 2641 0, 2642 /*110*/0, 2643 MatGetRedundantMatrix_MPIAIJ, 2644 MatGetRowMin_MPIAIJ, 2645 0, 2646 0, 2647 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2648 2649 /* ----------------------------------------------------------------------------------------*/ 2650 2651 EXTERN_C_BEGIN 2652 #undef __FUNCT__ 2653 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2654 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2655 { 2656 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2657 PetscErrorCode ierr; 2658 2659 PetscFunctionBegin; 2660 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2661 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2662 PetscFunctionReturn(0); 2663 } 2664 EXTERN_C_END 2665 2666 EXTERN_C_BEGIN 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2669 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2670 { 2671 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2672 PetscErrorCode ierr; 2673 2674 PetscFunctionBegin; 2675 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2676 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2677 PetscFunctionReturn(0); 2678 } 2679 EXTERN_C_END 2680 2681 #include "petscpc.h" 2682 EXTERN_C_BEGIN 2683 #undef __FUNCT__ 2684 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2685 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2686 { 2687 Mat_MPIAIJ *b; 2688 PetscErrorCode ierr; 2689 PetscInt i; 2690 2691 PetscFunctionBegin; 2692 B->preallocated = PETSC_TRUE; 2693 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2694 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2695 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2696 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2697 2698 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2699 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2700 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2701 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2702 if (d_nnz) { 2703 for (i=0; i<B->rmap->n; i++) { 2704 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2705 } 2706 } 2707 if (o_nnz) { 2708 for (i=0; i<B->rmap->n; i++) { 2709 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2710 } 2711 } 2712 b = (Mat_MPIAIJ*)B->data; 2713 2714 /* Explicitly create 2 MATSEQAIJ matrices. */ 2715 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2716 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2717 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2718 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2719 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2720 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2721 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2722 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2723 2724 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2725 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2726 2727 PetscFunctionReturn(0); 2728 } 2729 EXTERN_C_END 2730 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2733 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2734 { 2735 Mat mat; 2736 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2737 PetscErrorCode ierr; 2738 2739 PetscFunctionBegin; 2740 *newmat = 0; 2741 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2742 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2743 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2744 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2745 a = (Mat_MPIAIJ*)mat->data; 2746 2747 mat->factor = matin->factor; 2748 mat->rmap->bs = matin->rmap->bs; 2749 mat->assembled = PETSC_TRUE; 2750 mat->insertmode = NOT_SET_VALUES; 2751 mat->preallocated = PETSC_TRUE; 2752 2753 a->size = oldmat->size; 2754 a->rank = oldmat->rank; 2755 a->donotstash = oldmat->donotstash; 2756 a->roworiented = oldmat->roworiented; 2757 a->rowindices = 0; 2758 a->rowvalues = 0; 2759 a->getrowactive = PETSC_FALSE; 2760 2761 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2762 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2763 2764 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2765 if (oldmat->colmap) { 2766 #if defined (PETSC_USE_CTABLE) 2767 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2768 #else 2769 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2770 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2771 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2772 #endif 2773 } else a->colmap = 0; 2774 if (oldmat->garray) { 2775 PetscInt len; 2776 len = oldmat->B->cmap->n; 2777 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2778 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2779 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2780 } else a->garray = 0; 2781 2782 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2783 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2784 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2785 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2786 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2787 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2788 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2789 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2790 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2791 *newmat = mat; 2792 PetscFunctionReturn(0); 2793 } 2794 2795 #include "petscsys.h" 2796 2797 #undef __FUNCT__ 2798 #define __FUNCT__ "MatLoad_MPIAIJ" 2799 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2800 { 2801 Mat A; 2802 PetscScalar *vals,*svals; 2803 MPI_Comm comm = ((PetscObject)viewer)->comm; 2804 MPI_Status status; 2805 PetscErrorCode ierr; 2806 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2807 PetscInt i,nz,j,rstart,rend,mmax,maxnz; 2808 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2809 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2810 PetscInt cend,cstart,n,*rowners; 2811 int fd; 2812 2813 PetscFunctionBegin; 2814 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2815 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2816 if (!rank) { 2817 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2818 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2819 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2820 } 2821 2822 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2823 M = header[1]; N = header[2]; 2824 /* determine ownership of all rows */ 2825 m = M/size + ((M % size) > rank); 2826 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2827 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2828 2829 /* First process needs enough room for process with most rows */ 2830 if (!rank) { 2831 mmax = rowners[1]; 2832 for (i=2; i<size; i++) { 2833 mmax = PetscMax(mmax,rowners[i]); 2834 } 2835 } else mmax = m; 2836 2837 rowners[0] = 0; 2838 for (i=2; i<=size; i++) { 2839 rowners[i] += rowners[i-1]; 2840 } 2841 rstart = rowners[rank]; 2842 rend = rowners[rank+1]; 2843 2844 /* distribute row lengths to all processors */ 2845 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2846 if (!rank) { 2847 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2848 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2849 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2850 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2851 for (j=0; j<m; j++) { 2852 procsnz[0] += ourlens[j]; 2853 } 2854 for (i=1; i<size; i++) { 2855 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2856 /* calculate the number of nonzeros on each processor */ 2857 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2858 procsnz[i] += rowlengths[j]; 2859 } 2860 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2861 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2862 } 2863 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2864 } else { 2865 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2866 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2867 } 2868 2869 if (!rank) { 2870 /* determine max buffer needed and allocate it */ 2871 maxnz = 0; 2872 for (i=0; i<size; i++) { 2873 maxnz = PetscMax(maxnz,procsnz[i]); 2874 } 2875 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2876 2877 /* read in my part of the matrix column indices */ 2878 nz = procsnz[0]; 2879 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2880 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2881 2882 /* read in every one elses and ship off */ 2883 for (i=1; i<size; i++) { 2884 nz = procsnz[i]; 2885 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2886 mpicnt = PetscMPIIntCast(nz); 2887 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2888 } 2889 ierr = PetscFree(cols);CHKERRQ(ierr); 2890 } else { 2891 /* determine buffer space needed for message */ 2892 nz = 0; 2893 for (i=0; i<m; i++) { 2894 nz += ourlens[i]; 2895 } 2896 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2897 2898 /* receive message of column indices*/ 2899 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2900 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2901 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2902 if (status.MPI_ERROR) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() status has error code %d, expected length %d",status.MPI_ERROR,mpicnt);} 2903 else if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2904 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2905 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2906 } 2907 2908 /* determine column ownership if matrix is not square */ 2909 if (N != M) { 2910 n = N/size + ((N % size) > rank); 2911 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2912 cstart = cend - n; 2913 } else { 2914 cstart = rstart; 2915 cend = rend; 2916 n = cend - cstart; 2917 } 2918 2919 /* loop over local rows, determining number of off diagonal entries */ 2920 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2921 jj = 0; 2922 for (i=0; i<m; i++) { 2923 for (j=0; j<ourlens[i]; j++) { 2924 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2925 jj++; 2926 } 2927 } 2928 2929 /* create our matrix */ 2930 for (i=0; i<m; i++) { 2931 ourlens[i] -= offlens[i]; 2932 } 2933 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2934 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2935 ierr = MatSetType(A,type);CHKERRQ(ierr); 2936 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2937 2938 for (i=0; i<m; i++) { 2939 ourlens[i] += offlens[i]; 2940 } 2941 2942 if (!rank) { 2943 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2944 2945 /* read in my part of the matrix numerical values */ 2946 nz = procsnz[0]; 2947 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2948 2949 /* insert into matrix */ 2950 jj = rstart; 2951 smycols = mycols; 2952 svals = vals; 2953 for (i=0; i<m; i++) { 2954 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2955 smycols += ourlens[i]; 2956 svals += ourlens[i]; 2957 jj++; 2958 } 2959 2960 /* read in other processors and ship out */ 2961 for (i=1; i<size; i++) { 2962 nz = procsnz[i]; 2963 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2964 mpicnt = PetscMPIIntCast(nz); 2965 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2966 } 2967 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2968 } else { 2969 /* receive numeric values */ 2970 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2971 2972 /* receive message of values*/ 2973 mpicnt = PetscMPIIntCast(nz); 2974 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2975 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 2976 if (status.MPI_ERROR) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() status has error code %d, expected length %d",status.MPI_ERROR,mpicnt);} 2977 else if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2978 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2979 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2980 2981 /* insert into matrix */ 2982 jj = rstart; 2983 smycols = mycols; 2984 svals = vals; 2985 for (i=0; i<m; i++) { 2986 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2987 smycols += ourlens[i]; 2988 svals += ourlens[i]; 2989 jj++; 2990 } 2991 } 2992 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2993 ierr = PetscFree(vals);CHKERRQ(ierr); 2994 ierr = PetscFree(mycols);CHKERRQ(ierr); 2995 ierr = PetscFree(rowners);CHKERRQ(ierr); 2996 2997 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2998 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2999 *newmat = A; 3000 PetscFunctionReturn(0); 3001 } 3002 3003 #undef __FUNCT__ 3004 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3005 /* 3006 Not great since it makes two copies of the submatrix, first an SeqAIJ 3007 in local and then by concatenating the local matrices the end result. 3008 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3009 */ 3010 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3011 { 3012 PetscErrorCode ierr; 3013 PetscMPIInt rank,size; 3014 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3015 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3016 Mat *local,M,Mreuse; 3017 MatScalar *vwork,*aa; 3018 MPI_Comm comm = ((PetscObject)mat)->comm; 3019 Mat_SeqAIJ *aij; 3020 3021 3022 PetscFunctionBegin; 3023 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3024 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3025 3026 if (call == MAT_REUSE_MATRIX) { 3027 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3028 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3029 local = &Mreuse; 3030 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3031 } else { 3032 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3033 Mreuse = *local; 3034 ierr = PetscFree(local);CHKERRQ(ierr); 3035 } 3036 3037 /* 3038 m - number of local rows 3039 n - number of columns (same on all processors) 3040 rstart - first row in new global matrix generated 3041 */ 3042 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3043 if (call == MAT_INITIAL_MATRIX) { 3044 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3045 ii = aij->i; 3046 jj = aij->j; 3047 3048 /* 3049 Determine the number of non-zeros in the diagonal and off-diagonal 3050 portions of the matrix in order to do correct preallocation 3051 */ 3052 3053 /* first get start and end of "diagonal" columns */ 3054 if (csize == PETSC_DECIDE) { 3055 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3056 if (mglobal == n) { /* square matrix */ 3057 nlocal = m; 3058 } else { 3059 nlocal = n/size + ((n % size) > rank); 3060 } 3061 } else { 3062 nlocal = csize; 3063 } 3064 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3065 rstart = rend - nlocal; 3066 if (rank == size - 1 && rend != n) { 3067 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3068 } 3069 3070 /* next, compute all the lengths */ 3071 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3072 olens = dlens + m; 3073 for (i=0; i<m; i++) { 3074 jend = ii[i+1] - ii[i]; 3075 olen = 0; 3076 dlen = 0; 3077 for (j=0; j<jend; j++) { 3078 if (*jj < rstart || *jj >= rend) olen++; 3079 else dlen++; 3080 jj++; 3081 } 3082 olens[i] = olen; 3083 dlens[i] = dlen; 3084 } 3085 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3086 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3087 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3088 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3089 ierr = PetscFree(dlens);CHKERRQ(ierr); 3090 } else { 3091 PetscInt ml,nl; 3092 3093 M = *newmat; 3094 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3095 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3096 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3097 /* 3098 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3099 rather than the slower MatSetValues(). 3100 */ 3101 M->was_assembled = PETSC_TRUE; 3102 M->assembled = PETSC_FALSE; 3103 } 3104 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3105 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3106 ii = aij->i; 3107 jj = aij->j; 3108 aa = aij->a; 3109 for (i=0; i<m; i++) { 3110 row = rstart + i; 3111 nz = ii[i+1] - ii[i]; 3112 cwork = jj; jj += nz; 3113 vwork = aa; aa += nz; 3114 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3115 } 3116 3117 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3118 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3119 *newmat = M; 3120 3121 /* save submatrix used in processor for next request */ 3122 if (call == MAT_INITIAL_MATRIX) { 3123 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3124 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3125 } 3126 3127 PetscFunctionReturn(0); 3128 } 3129 3130 EXTERN_C_BEGIN 3131 #undef __FUNCT__ 3132 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3133 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3134 { 3135 PetscInt m,cstart, cend,j,nnz,i,d; 3136 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3137 const PetscInt *JJ; 3138 PetscScalar *values; 3139 PetscErrorCode ierr; 3140 3141 PetscFunctionBegin; 3142 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3143 3144 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3145 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3146 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3147 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3148 m = B->rmap->n; 3149 cstart = B->cmap->rstart; 3150 cend = B->cmap->rend; 3151 rstart = B->rmap->rstart; 3152 3153 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3154 o_nnz = d_nnz + m; 3155 3156 #if defined(PETSC_USE_DEBUGGING) 3157 for (i=0; i<m; i++) { 3158 nnz = Ii[i+1]- Ii[i]; 3159 JJ = J + Ii[i]; 3160 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3161 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3162 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3163 for (j=1; j<nnz; j++) { 3164 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3165 } 3166 } 3167 #endif 3168 3169 for (i=0; i<m; i++) { 3170 nnz = Ii[i+1]- Ii[i]; 3171 JJ = J + Ii[i]; 3172 nnz_max = PetscMax(nnz_max,nnz); 3173 for (j=0; j<nnz; j++) { 3174 if (*JJ >= cstart) break; 3175 JJ++; 3176 } 3177 d = 0; 3178 for (; j<nnz; j++) { 3179 if (*JJ++ >= cend) break; 3180 d++; 3181 } 3182 d_nnz[i] = d; 3183 o_nnz[i] = nnz - d; 3184 } 3185 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3186 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3187 3188 if (v) values = (PetscScalar*)v; 3189 else { 3190 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3191 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3192 } 3193 3194 for (i=0; i<m; i++) { 3195 ii = i + rstart; 3196 nnz = Ii[i+1]- Ii[i]; 3197 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3198 } 3199 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3200 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3201 3202 if (!v) { 3203 ierr = PetscFree(values);CHKERRQ(ierr); 3204 } 3205 PetscFunctionReturn(0); 3206 } 3207 EXTERN_C_END 3208 3209 #undef __FUNCT__ 3210 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3211 /*@ 3212 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3213 (the default parallel PETSc format). 3214 3215 Collective on MPI_Comm 3216 3217 Input Parameters: 3218 + B - the matrix 3219 . i - the indices into j for the start of each local row (starts with zero) 3220 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3221 - v - optional values in the matrix 3222 3223 Level: developer 3224 3225 Notes: 3226 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3227 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3228 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3229 3230 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3231 3232 The format which is used for the sparse matrix input, is equivalent to a 3233 row-major ordering.. i.e for the following matrix, the input data expected is 3234 as shown: 3235 3236 1 0 0 3237 2 0 3 P0 3238 ------- 3239 4 5 6 P1 3240 3241 Process0 [P0]: rows_owned=[0,1] 3242 i = {0,1,3} [size = nrow+1 = 2+1] 3243 j = {0,0,2} [size = nz = 6] 3244 v = {1,2,3} [size = nz = 6] 3245 3246 Process1 [P1]: rows_owned=[2] 3247 i = {0,3} [size = nrow+1 = 1+1] 3248 j = {0,1,2} [size = nz = 6] 3249 v = {4,5,6} [size = nz = 6] 3250 3251 The column indices for each row MUST be sorted. 3252 3253 .keywords: matrix, aij, compressed row, sparse, parallel 3254 3255 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3256 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3257 @*/ 3258 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3259 { 3260 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3261 3262 PetscFunctionBegin; 3263 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3264 if (f) { 3265 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3266 } 3267 PetscFunctionReturn(0); 3268 } 3269 3270 #undef __FUNCT__ 3271 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3272 /*@C 3273 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3274 (the default parallel PETSc format). For good matrix assembly performance 3275 the user should preallocate the matrix storage by setting the parameters 3276 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3277 performance can be increased by more than a factor of 50. 3278 3279 Collective on MPI_Comm 3280 3281 Input Parameters: 3282 + A - the matrix 3283 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3284 (same value is used for all local rows) 3285 . d_nnz - array containing the number of nonzeros in the various rows of the 3286 DIAGONAL portion of the local submatrix (possibly different for each row) 3287 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3288 The size of this array is equal to the number of local rows, i.e 'm'. 3289 You must leave room for the diagonal entry even if it is zero. 3290 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3291 submatrix (same value is used for all local rows). 3292 - o_nnz - array containing the number of nonzeros in the various rows of the 3293 OFF-DIAGONAL portion of the local submatrix (possibly different for 3294 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3295 structure. The size of this array is equal to the number 3296 of local rows, i.e 'm'. 3297 3298 If the *_nnz parameter is given then the *_nz parameter is ignored 3299 3300 The AIJ format (also called the Yale sparse matrix format or 3301 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3302 storage. The stored row and column indices begin with zero. See the users manual for details. 3303 3304 The parallel matrix is partitioned such that the first m0 rows belong to 3305 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3306 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3307 3308 The DIAGONAL portion of the local submatrix of a processor can be defined 3309 as the submatrix which is obtained by extraction the part corresponding 3310 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3311 first row that belongs to the processor, and r2 is the last row belonging 3312 to the this processor. This is a square mxm matrix. The remaining portion 3313 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3314 3315 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3316 3317 You can call MatGetInfo() to get information on how effective the preallocation was; 3318 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3319 You can also run with the option -info and look for messages with the string 3320 malloc in them to see if additional memory allocation was needed. 3321 3322 Example usage: 3323 3324 Consider the following 8x8 matrix with 34 non-zero values, that is 3325 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3326 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3327 as follows: 3328 3329 .vb 3330 1 2 0 | 0 3 0 | 0 4 3331 Proc0 0 5 6 | 7 0 0 | 8 0 3332 9 0 10 | 11 0 0 | 12 0 3333 ------------------------------------- 3334 13 0 14 | 15 16 17 | 0 0 3335 Proc1 0 18 0 | 19 20 21 | 0 0 3336 0 0 0 | 22 23 0 | 24 0 3337 ------------------------------------- 3338 Proc2 25 26 27 | 0 0 28 | 29 0 3339 30 0 0 | 31 32 33 | 0 34 3340 .ve 3341 3342 This can be represented as a collection of submatrices as: 3343 3344 .vb 3345 A B C 3346 D E F 3347 G H I 3348 .ve 3349 3350 Where the submatrices A,B,C are owned by proc0, D,E,F are 3351 owned by proc1, G,H,I are owned by proc2. 3352 3353 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3354 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3355 The 'M','N' parameters are 8,8, and have the same values on all procs. 3356 3357 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3358 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3359 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3360 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3361 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3362 matrix, ans [DF] as another SeqAIJ matrix. 3363 3364 When d_nz, o_nz parameters are specified, d_nz storage elements are 3365 allocated for every row of the local diagonal submatrix, and o_nz 3366 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3367 One way to choose d_nz and o_nz is to use the max nonzerors per local 3368 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3369 In this case, the values of d_nz,o_nz are: 3370 .vb 3371 proc0 : dnz = 2, o_nz = 2 3372 proc1 : dnz = 3, o_nz = 2 3373 proc2 : dnz = 1, o_nz = 4 3374 .ve 3375 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3376 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3377 for proc3. i.e we are using 12+15+10=37 storage locations to store 3378 34 values. 3379 3380 When d_nnz, o_nnz parameters are specified, the storage is specified 3381 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3382 In the above case the values for d_nnz,o_nnz are: 3383 .vb 3384 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3385 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3386 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3387 .ve 3388 Here the space allocated is sum of all the above values i.e 34, and 3389 hence pre-allocation is perfect. 3390 3391 Level: intermediate 3392 3393 .keywords: matrix, aij, compressed row, sparse, parallel 3394 3395 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3396 MPIAIJ, MatGetInfo() 3397 @*/ 3398 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3399 { 3400 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3401 3402 PetscFunctionBegin; 3403 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3404 if (f) { 3405 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3406 } 3407 PetscFunctionReturn(0); 3408 } 3409 3410 #undef __FUNCT__ 3411 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3412 /*@ 3413 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3414 CSR format the local rows. 3415 3416 Collective on MPI_Comm 3417 3418 Input Parameters: 3419 + comm - MPI communicator 3420 . m - number of local rows (Cannot be PETSC_DECIDE) 3421 . n - This value should be the same as the local size used in creating the 3422 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3423 calculated if N is given) For square matrices n is almost always m. 3424 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3425 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3426 . i - row indices 3427 . j - column indices 3428 - a - matrix values 3429 3430 Output Parameter: 3431 . mat - the matrix 3432 3433 Level: intermediate 3434 3435 Notes: 3436 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3437 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3438 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3439 3440 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3441 3442 The format which is used for the sparse matrix input, is equivalent to a 3443 row-major ordering.. i.e for the following matrix, the input data expected is 3444 as shown: 3445 3446 1 0 0 3447 2 0 3 P0 3448 ------- 3449 4 5 6 P1 3450 3451 Process0 [P0]: rows_owned=[0,1] 3452 i = {0,1,3} [size = nrow+1 = 2+1] 3453 j = {0,0,2} [size = nz = 6] 3454 v = {1,2,3} [size = nz = 6] 3455 3456 Process1 [P1]: rows_owned=[2] 3457 i = {0,3} [size = nrow+1 = 1+1] 3458 j = {0,1,2} [size = nz = 6] 3459 v = {4,5,6} [size = nz = 6] 3460 3461 .keywords: matrix, aij, compressed row, sparse, parallel 3462 3463 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3464 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3465 @*/ 3466 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3467 { 3468 PetscErrorCode ierr; 3469 3470 PetscFunctionBegin; 3471 if (i[0]) { 3472 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3473 } 3474 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3475 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3476 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3477 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3478 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3479 PetscFunctionReturn(0); 3480 } 3481 3482 #undef __FUNCT__ 3483 #define __FUNCT__ "MatCreateMPIAIJ" 3484 /*@C 3485 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3486 (the default parallel PETSc format). For good matrix assembly performance 3487 the user should preallocate the matrix storage by setting the parameters 3488 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3489 performance can be increased by more than a factor of 50. 3490 3491 Collective on MPI_Comm 3492 3493 Input Parameters: 3494 + comm - MPI communicator 3495 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3496 This value should be the same as the local size used in creating the 3497 y vector for the matrix-vector product y = Ax. 3498 . n - This value should be the same as the local size used in creating the 3499 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3500 calculated if N is given) For square matrices n is almost always m. 3501 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3502 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3503 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3504 (same value is used for all local rows) 3505 . d_nnz - array containing the number of nonzeros in the various rows of the 3506 DIAGONAL portion of the local submatrix (possibly different for each row) 3507 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3508 The size of this array is equal to the number of local rows, i.e 'm'. 3509 You must leave room for the diagonal entry even if it is zero. 3510 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3511 submatrix (same value is used for all local rows). 3512 - o_nnz - array containing the number of nonzeros in the various rows of the 3513 OFF-DIAGONAL portion of the local submatrix (possibly different for 3514 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3515 structure. The size of this array is equal to the number 3516 of local rows, i.e 'm'. 3517 3518 Output Parameter: 3519 . A - the matrix 3520 3521 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3522 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3523 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3524 3525 Notes: 3526 If the *_nnz parameter is given then the *_nz parameter is ignored 3527 3528 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3529 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3530 storage requirements for this matrix. 3531 3532 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3533 processor than it must be used on all processors that share the object for 3534 that argument. 3535 3536 The user MUST specify either the local or global matrix dimensions 3537 (possibly both). 3538 3539 The parallel matrix is partitioned across processors such that the 3540 first m0 rows belong to process 0, the next m1 rows belong to 3541 process 1, the next m2 rows belong to process 2 etc.. where 3542 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3543 values corresponding to [m x N] submatrix. 3544 3545 The columns are logically partitioned with the n0 columns belonging 3546 to 0th partition, the next n1 columns belonging to the next 3547 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3548 3549 The DIAGONAL portion of the local submatrix on any given processor 3550 is the submatrix corresponding to the rows and columns m,n 3551 corresponding to the given processor. i.e diagonal matrix on 3552 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3553 etc. The remaining portion of the local submatrix [m x (N-n)] 3554 constitute the OFF-DIAGONAL portion. The example below better 3555 illustrates this concept. 3556 3557 For a square global matrix we define each processor's diagonal portion 3558 to be its local rows and the corresponding columns (a square submatrix); 3559 each processor's off-diagonal portion encompasses the remainder of the 3560 local matrix (a rectangular submatrix). 3561 3562 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3563 3564 When calling this routine with a single process communicator, a matrix of 3565 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3566 type of communicator, use the construction mechanism: 3567 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3568 3569 By default, this format uses inodes (identical nodes) when possible. 3570 We search for consecutive rows with the same nonzero structure, thereby 3571 reusing matrix information to achieve increased efficiency. 3572 3573 Options Database Keys: 3574 + -mat_no_inode - Do not use inodes 3575 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3576 - -mat_aij_oneindex - Internally use indexing starting at 1 3577 rather than 0. Note that when calling MatSetValues(), 3578 the user still MUST index entries starting at 0! 3579 3580 3581 Example usage: 3582 3583 Consider the following 8x8 matrix with 34 non-zero values, that is 3584 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3585 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3586 as follows: 3587 3588 .vb 3589 1 2 0 | 0 3 0 | 0 4 3590 Proc0 0 5 6 | 7 0 0 | 8 0 3591 9 0 10 | 11 0 0 | 12 0 3592 ------------------------------------- 3593 13 0 14 | 15 16 17 | 0 0 3594 Proc1 0 18 0 | 19 20 21 | 0 0 3595 0 0 0 | 22 23 0 | 24 0 3596 ------------------------------------- 3597 Proc2 25 26 27 | 0 0 28 | 29 0 3598 30 0 0 | 31 32 33 | 0 34 3599 .ve 3600 3601 This can be represented as a collection of submatrices as: 3602 3603 .vb 3604 A B C 3605 D E F 3606 G H I 3607 .ve 3608 3609 Where the submatrices A,B,C are owned by proc0, D,E,F are 3610 owned by proc1, G,H,I are owned by proc2. 3611 3612 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3613 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3614 The 'M','N' parameters are 8,8, and have the same values on all procs. 3615 3616 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3617 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3618 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3619 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3620 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3621 matrix, ans [DF] as another SeqAIJ matrix. 3622 3623 When d_nz, o_nz parameters are specified, d_nz storage elements are 3624 allocated for every row of the local diagonal submatrix, and o_nz 3625 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3626 One way to choose d_nz and o_nz is to use the max nonzerors per local 3627 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3628 In this case, the values of d_nz,o_nz are: 3629 .vb 3630 proc0 : dnz = 2, o_nz = 2 3631 proc1 : dnz = 3, o_nz = 2 3632 proc2 : dnz = 1, o_nz = 4 3633 .ve 3634 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3635 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3636 for proc3. i.e we are using 12+15+10=37 storage locations to store 3637 34 values. 3638 3639 When d_nnz, o_nnz parameters are specified, the storage is specified 3640 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3641 In the above case the values for d_nnz,o_nnz are: 3642 .vb 3643 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3644 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3645 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3646 .ve 3647 Here the space allocated is sum of all the above values i.e 34, and 3648 hence pre-allocation is perfect. 3649 3650 Level: intermediate 3651 3652 .keywords: matrix, aij, compressed row, sparse, parallel 3653 3654 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3655 MPIAIJ, MatCreateMPIAIJWithArrays() 3656 @*/ 3657 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3658 { 3659 PetscErrorCode ierr; 3660 PetscMPIInt size; 3661 3662 PetscFunctionBegin; 3663 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3664 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3665 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3666 if (size > 1) { 3667 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3668 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3669 } else { 3670 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3671 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3672 } 3673 PetscFunctionReturn(0); 3674 } 3675 3676 #undef __FUNCT__ 3677 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3678 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3679 { 3680 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3681 3682 PetscFunctionBegin; 3683 *Ad = a->A; 3684 *Ao = a->B; 3685 *colmap = a->garray; 3686 PetscFunctionReturn(0); 3687 } 3688 3689 #undef __FUNCT__ 3690 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3691 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3692 { 3693 PetscErrorCode ierr; 3694 PetscInt i; 3695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3696 3697 PetscFunctionBegin; 3698 if (coloring->ctype == IS_COLORING_GLOBAL) { 3699 ISColoringValue *allcolors,*colors; 3700 ISColoring ocoloring; 3701 3702 /* set coloring for diagonal portion */ 3703 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3704 3705 /* set coloring for off-diagonal portion */ 3706 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3707 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3708 for (i=0; i<a->B->cmap->n; i++) { 3709 colors[i] = allcolors[a->garray[i]]; 3710 } 3711 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3712 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3713 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3714 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3715 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3716 ISColoringValue *colors; 3717 PetscInt *larray; 3718 ISColoring ocoloring; 3719 3720 /* set coloring for diagonal portion */ 3721 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3722 for (i=0; i<a->A->cmap->n; i++) { 3723 larray[i] = i + A->cmap->rstart; 3724 } 3725 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3726 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3727 for (i=0; i<a->A->cmap->n; i++) { 3728 colors[i] = coloring->colors[larray[i]]; 3729 } 3730 ierr = PetscFree(larray);CHKERRQ(ierr); 3731 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3732 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3733 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3734 3735 /* set coloring for off-diagonal portion */ 3736 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3737 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3738 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3739 for (i=0; i<a->B->cmap->n; i++) { 3740 colors[i] = coloring->colors[larray[i]]; 3741 } 3742 ierr = PetscFree(larray);CHKERRQ(ierr); 3743 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3744 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3745 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3746 } else { 3747 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3748 } 3749 3750 PetscFunctionReturn(0); 3751 } 3752 3753 #if defined(PETSC_HAVE_ADIC) 3754 #undef __FUNCT__ 3755 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3756 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3757 { 3758 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3759 PetscErrorCode ierr; 3760 3761 PetscFunctionBegin; 3762 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3763 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3764 PetscFunctionReturn(0); 3765 } 3766 #endif 3767 3768 #undef __FUNCT__ 3769 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3770 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3771 { 3772 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3773 PetscErrorCode ierr; 3774 3775 PetscFunctionBegin; 3776 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3777 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3778 PetscFunctionReturn(0); 3779 } 3780 3781 #undef __FUNCT__ 3782 #define __FUNCT__ "MatMerge" 3783 /*@ 3784 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3785 matrices from each processor 3786 3787 Collective on MPI_Comm 3788 3789 Input Parameters: 3790 + comm - the communicators the parallel matrix will live on 3791 . inmat - the input sequential matrices 3792 . n - number of local columns (or PETSC_DECIDE) 3793 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3794 3795 Output Parameter: 3796 . outmat - the parallel matrix generated 3797 3798 Level: advanced 3799 3800 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3801 3802 @*/ 3803 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3804 { 3805 PetscErrorCode ierr; 3806 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3807 PetscInt *indx; 3808 PetscScalar *values; 3809 3810 PetscFunctionBegin; 3811 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3812 if (scall == MAT_INITIAL_MATRIX){ 3813 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3814 if (n == PETSC_DECIDE){ 3815 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3816 } 3817 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3818 rstart -= m; 3819 3820 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3821 for (i=0;i<m;i++) { 3822 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3823 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3824 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3825 } 3826 /* This routine will ONLY return MPIAIJ type matrix */ 3827 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3828 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3829 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3830 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3831 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3832 3833 } else if (scall == MAT_REUSE_MATRIX){ 3834 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3835 } else { 3836 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3837 } 3838 3839 for (i=0;i<m;i++) { 3840 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3841 Ii = i + rstart; 3842 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3843 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3844 } 3845 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3846 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3847 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3848 3849 PetscFunctionReturn(0); 3850 } 3851 3852 #undef __FUNCT__ 3853 #define __FUNCT__ "MatFileSplit" 3854 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3855 { 3856 PetscErrorCode ierr; 3857 PetscMPIInt rank; 3858 PetscInt m,N,i,rstart,nnz; 3859 size_t len; 3860 const PetscInt *indx; 3861 PetscViewer out; 3862 char *name; 3863 Mat B; 3864 const PetscScalar *values; 3865 3866 PetscFunctionBegin; 3867 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3868 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3869 /* Should this be the type of the diagonal block of A? */ 3870 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3871 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3872 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3873 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3874 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3875 for (i=0;i<m;i++) { 3876 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3877 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3878 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3879 } 3880 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3881 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3882 3883 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3884 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3885 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3886 sprintf(name,"%s.%d",outfile,rank); 3887 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3888 ierr = PetscFree(name); 3889 ierr = MatView(B,out);CHKERRQ(ierr); 3890 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3891 ierr = MatDestroy(B);CHKERRQ(ierr); 3892 PetscFunctionReturn(0); 3893 } 3894 3895 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3896 #undef __FUNCT__ 3897 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3898 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3899 { 3900 PetscErrorCode ierr; 3901 Mat_Merge_SeqsToMPI *merge; 3902 PetscContainer container; 3903 3904 PetscFunctionBegin; 3905 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3906 if (container) { 3907 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3908 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3909 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3910 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3911 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3912 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3913 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3914 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3915 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3916 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3917 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3918 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3919 3920 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3921 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3922 } 3923 ierr = PetscFree(merge);CHKERRQ(ierr); 3924 3925 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3926 PetscFunctionReturn(0); 3927 } 3928 3929 #include "../src/mat/utils/freespace.h" 3930 #include "petscbt.h" 3931 3932 #undef __FUNCT__ 3933 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3934 /*@C 3935 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3936 matrices from each processor 3937 3938 Collective on MPI_Comm 3939 3940 Input Parameters: 3941 + comm - the communicators the parallel matrix will live on 3942 . seqmat - the input sequential matrices 3943 . m - number of local rows (or PETSC_DECIDE) 3944 . n - number of local columns (or PETSC_DECIDE) 3945 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3946 3947 Output Parameter: 3948 . mpimat - the parallel matrix generated 3949 3950 Level: advanced 3951 3952 Notes: 3953 The dimensions of the sequential matrix in each processor MUST be the same. 3954 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3955 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3956 @*/ 3957 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3958 { 3959 PetscErrorCode ierr; 3960 MPI_Comm comm=((PetscObject)mpimat)->comm; 3961 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3962 PetscMPIInt size,rank,taga,*len_s; 3963 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 3964 PetscInt proc,m; 3965 PetscInt **buf_ri,**buf_rj; 3966 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3967 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3968 MPI_Request *s_waits,*r_waits; 3969 MPI_Status *status; 3970 MatScalar *aa=a->a; 3971 MatScalar **abuf_r,*ba_i; 3972 Mat_Merge_SeqsToMPI *merge; 3973 PetscContainer container; 3974 3975 PetscFunctionBegin; 3976 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3977 3978 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3979 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3980 3981 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3982 if (container) { 3983 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3984 } 3985 bi = merge->bi; 3986 bj = merge->bj; 3987 buf_ri = merge->buf_ri; 3988 buf_rj = merge->buf_rj; 3989 3990 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3991 owners = merge->rowmap.range; 3992 len_s = merge->len_s; 3993 3994 /* send and recv matrix values */ 3995 /*-----------------------------*/ 3996 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3997 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3998 3999 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4000 for (proc=0,k=0; proc<size; proc++){ 4001 if (!len_s[proc]) continue; 4002 i = owners[proc]; 4003 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4004 k++; 4005 } 4006 4007 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4008 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4009 ierr = PetscFree(status);CHKERRQ(ierr); 4010 4011 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4012 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4013 4014 /* insert mat values of mpimat */ 4015 /*----------------------------*/ 4016 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4017 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4018 nextrow = buf_ri_k + merge->nrecv; 4019 nextai = nextrow + merge->nrecv; 4020 4021 for (k=0; k<merge->nrecv; k++){ 4022 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4023 nrows = *(buf_ri_k[k]); 4024 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4025 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4026 } 4027 4028 /* set values of ba */ 4029 m = merge->rowmap.n; 4030 for (i=0; i<m; i++) { 4031 arow = owners[rank] + i; 4032 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4033 bnzi = bi[i+1] - bi[i]; 4034 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4035 4036 /* add local non-zero vals of this proc's seqmat into ba */ 4037 anzi = ai[arow+1] - ai[arow]; 4038 aj = a->j + ai[arow]; 4039 aa = a->a + ai[arow]; 4040 nextaj = 0; 4041 for (j=0; nextaj<anzi; j++){ 4042 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4043 ba_i[j] += aa[nextaj++]; 4044 } 4045 } 4046 4047 /* add received vals into ba */ 4048 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4049 /* i-th row */ 4050 if (i == *nextrow[k]) { 4051 anzi = *(nextai[k]+1) - *nextai[k]; 4052 aj = buf_rj[k] + *(nextai[k]); 4053 aa = abuf_r[k] + *(nextai[k]); 4054 nextaj = 0; 4055 for (j=0; nextaj<anzi; j++){ 4056 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4057 ba_i[j] += aa[nextaj++]; 4058 } 4059 } 4060 nextrow[k]++; nextai[k]++; 4061 } 4062 } 4063 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4064 } 4065 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4066 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4067 4068 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4069 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4070 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4071 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4072 PetscFunctionReturn(0); 4073 } 4074 4075 #undef __FUNCT__ 4076 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4077 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4078 { 4079 PetscErrorCode ierr; 4080 Mat B_mpi; 4081 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4082 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4083 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4084 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4085 PetscInt len,proc,*dnz,*onz; 4086 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4087 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4088 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4089 MPI_Status *status; 4090 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4091 PetscBT lnkbt; 4092 Mat_Merge_SeqsToMPI *merge; 4093 PetscContainer container; 4094 4095 PetscFunctionBegin; 4096 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4097 4098 /* make sure it is a PETSc comm */ 4099 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4101 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4102 4103 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4104 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4105 4106 /* determine row ownership */ 4107 /*---------------------------------------------------------*/ 4108 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4109 merge->rowmap.n = m; 4110 merge->rowmap.N = M; 4111 merge->rowmap.bs = 1; 4112 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4113 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4114 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4115 4116 m = merge->rowmap.n; 4117 M = merge->rowmap.N; 4118 owners = merge->rowmap.range; 4119 4120 /* determine the number of messages to send, their lengths */ 4121 /*---------------------------------------------------------*/ 4122 len_s = merge->len_s; 4123 4124 len = 0; /* length of buf_si[] */ 4125 merge->nsend = 0; 4126 for (proc=0; proc<size; proc++){ 4127 len_si[proc] = 0; 4128 if (proc == rank){ 4129 len_s[proc] = 0; 4130 } else { 4131 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4132 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4133 } 4134 if (len_s[proc]) { 4135 merge->nsend++; 4136 nrows = 0; 4137 for (i=owners[proc]; i<owners[proc+1]; i++){ 4138 if (ai[i+1] > ai[i]) nrows++; 4139 } 4140 len_si[proc] = 2*(nrows+1); 4141 len += len_si[proc]; 4142 } 4143 } 4144 4145 /* determine the number and length of messages to receive for ij-structure */ 4146 /*-------------------------------------------------------------------------*/ 4147 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4148 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4149 4150 /* post the Irecv of j-structure */ 4151 /*-------------------------------*/ 4152 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4153 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4154 4155 /* post the Isend of j-structure */ 4156 /*--------------------------------*/ 4157 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4158 sj_waits = si_waits + merge->nsend; 4159 4160 for (proc=0, k=0; proc<size; proc++){ 4161 if (!len_s[proc]) continue; 4162 i = owners[proc]; 4163 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4164 k++; 4165 } 4166 4167 /* receives and sends of j-structure are complete */ 4168 /*------------------------------------------------*/ 4169 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4170 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4171 4172 /* send and recv i-structure */ 4173 /*---------------------------*/ 4174 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4175 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4176 4177 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4178 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4179 for (proc=0,k=0; proc<size; proc++){ 4180 if (!len_s[proc]) continue; 4181 /* form outgoing message for i-structure: 4182 buf_si[0]: nrows to be sent 4183 [1:nrows]: row index (global) 4184 [nrows+1:2*nrows+1]: i-structure index 4185 */ 4186 /*-------------------------------------------*/ 4187 nrows = len_si[proc]/2 - 1; 4188 buf_si_i = buf_si + nrows+1; 4189 buf_si[0] = nrows; 4190 buf_si_i[0] = 0; 4191 nrows = 0; 4192 for (i=owners[proc]; i<owners[proc+1]; i++){ 4193 anzi = ai[i+1] - ai[i]; 4194 if (anzi) { 4195 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4196 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4197 nrows++; 4198 } 4199 } 4200 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4201 k++; 4202 buf_si += len_si[proc]; 4203 } 4204 4205 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4206 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4207 4208 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4209 for (i=0; i<merge->nrecv; i++){ 4210 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4211 } 4212 4213 ierr = PetscFree(len_si);CHKERRQ(ierr); 4214 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4215 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4216 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4217 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4218 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4219 ierr = PetscFree(status);CHKERRQ(ierr); 4220 4221 /* compute a local seq matrix in each processor */ 4222 /*----------------------------------------------*/ 4223 /* allocate bi array and free space for accumulating nonzero column info */ 4224 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4225 bi[0] = 0; 4226 4227 /* create and initialize a linked list */ 4228 nlnk = N+1; 4229 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4230 4231 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4232 len = 0; 4233 len = ai[owners[rank+1]] - ai[owners[rank]]; 4234 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4235 current_space = free_space; 4236 4237 /* determine symbolic info for each local row */ 4238 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4239 nextrow = buf_ri_k + merge->nrecv; 4240 nextai = nextrow + merge->nrecv; 4241 for (k=0; k<merge->nrecv; k++){ 4242 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4243 nrows = *buf_ri_k[k]; 4244 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4245 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4246 } 4247 4248 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4249 len = 0; 4250 for (i=0;i<m;i++) { 4251 bnzi = 0; 4252 /* add local non-zero cols of this proc's seqmat into lnk */ 4253 arow = owners[rank] + i; 4254 anzi = ai[arow+1] - ai[arow]; 4255 aj = a->j + ai[arow]; 4256 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4257 bnzi += nlnk; 4258 /* add received col data into lnk */ 4259 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4260 if (i == *nextrow[k]) { /* i-th row */ 4261 anzi = *(nextai[k]+1) - *nextai[k]; 4262 aj = buf_rj[k] + *nextai[k]; 4263 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4264 bnzi += nlnk; 4265 nextrow[k]++; nextai[k]++; 4266 } 4267 } 4268 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4269 4270 /* if free space is not available, make more free space */ 4271 if (current_space->local_remaining<bnzi) { 4272 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4273 nspacedouble++; 4274 } 4275 /* copy data into free space, then initialize lnk */ 4276 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4277 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4278 4279 current_space->array += bnzi; 4280 current_space->local_used += bnzi; 4281 current_space->local_remaining -= bnzi; 4282 4283 bi[i+1] = bi[i] + bnzi; 4284 } 4285 4286 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4287 4288 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4289 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4290 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4291 4292 /* create symbolic parallel matrix B_mpi */ 4293 /*---------------------------------------*/ 4294 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4295 if (n==PETSC_DECIDE) { 4296 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4297 } else { 4298 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4299 } 4300 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4301 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4302 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4303 4304 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4305 B_mpi->assembled = PETSC_FALSE; 4306 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4307 merge->bi = bi; 4308 merge->bj = bj; 4309 merge->buf_ri = buf_ri; 4310 merge->buf_rj = buf_rj; 4311 merge->coi = PETSC_NULL; 4312 merge->coj = PETSC_NULL; 4313 merge->owners_co = PETSC_NULL; 4314 4315 /* attach the supporting struct to B_mpi for reuse */ 4316 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4317 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4318 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4319 *mpimat = B_mpi; 4320 4321 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4322 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4323 PetscFunctionReturn(0); 4324 } 4325 4326 #undef __FUNCT__ 4327 #define __FUNCT__ "MatMerge_SeqsToMPI" 4328 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4329 { 4330 PetscErrorCode ierr; 4331 4332 PetscFunctionBegin; 4333 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4334 if (scall == MAT_INITIAL_MATRIX){ 4335 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4336 } 4337 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4338 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4339 PetscFunctionReturn(0); 4340 } 4341 4342 #undef __FUNCT__ 4343 #define __FUNCT__ "MatGetLocalMat" 4344 /*@ 4345 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4346 4347 Not Collective 4348 4349 Input Parameters: 4350 + A - the matrix 4351 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4352 4353 Output Parameter: 4354 . A_loc - the local sequential matrix generated 4355 4356 Level: developer 4357 4358 @*/ 4359 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4360 { 4361 PetscErrorCode ierr; 4362 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4363 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4364 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4365 MatScalar *aa=a->a,*ba=b->a,*cam; 4366 PetscScalar *ca; 4367 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4368 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4369 4370 PetscFunctionBegin; 4371 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4372 if (scall == MAT_INITIAL_MATRIX){ 4373 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4374 ci[0] = 0; 4375 for (i=0; i<am; i++){ 4376 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4377 } 4378 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4379 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4380 k = 0; 4381 for (i=0; i<am; i++) { 4382 ncols_o = bi[i+1] - bi[i]; 4383 ncols_d = ai[i+1] - ai[i]; 4384 /* off-diagonal portion of A */ 4385 for (jo=0; jo<ncols_o; jo++) { 4386 col = cmap[*bj]; 4387 if (col >= cstart) break; 4388 cj[k] = col; bj++; 4389 ca[k++] = *ba++; 4390 } 4391 /* diagonal portion of A */ 4392 for (j=0; j<ncols_d; j++) { 4393 cj[k] = cstart + *aj++; 4394 ca[k++] = *aa++; 4395 } 4396 /* off-diagonal portion of A */ 4397 for (j=jo; j<ncols_o; j++) { 4398 cj[k] = cmap[*bj++]; 4399 ca[k++] = *ba++; 4400 } 4401 } 4402 /* put together the new matrix */ 4403 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4404 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4405 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4406 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4407 mat->free_a = PETSC_TRUE; 4408 mat->free_ij = PETSC_TRUE; 4409 mat->nonew = 0; 4410 } else if (scall == MAT_REUSE_MATRIX){ 4411 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4412 ci = mat->i; cj = mat->j; cam = mat->a; 4413 for (i=0; i<am; i++) { 4414 /* off-diagonal portion of A */ 4415 ncols_o = bi[i+1] - bi[i]; 4416 for (jo=0; jo<ncols_o; jo++) { 4417 col = cmap[*bj]; 4418 if (col >= cstart) break; 4419 *cam++ = *ba++; bj++; 4420 } 4421 /* diagonal portion of A */ 4422 ncols_d = ai[i+1] - ai[i]; 4423 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4424 /* off-diagonal portion of A */ 4425 for (j=jo; j<ncols_o; j++) { 4426 *cam++ = *ba++; bj++; 4427 } 4428 } 4429 } else { 4430 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4431 } 4432 4433 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4434 PetscFunctionReturn(0); 4435 } 4436 4437 #undef __FUNCT__ 4438 #define __FUNCT__ "MatGetLocalMatCondensed" 4439 /*@C 4440 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4441 4442 Not Collective 4443 4444 Input Parameters: 4445 + A - the matrix 4446 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4447 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4448 4449 Output Parameter: 4450 . A_loc - the local sequential matrix generated 4451 4452 Level: developer 4453 4454 @*/ 4455 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4456 { 4457 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4458 PetscErrorCode ierr; 4459 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4460 IS isrowa,iscola; 4461 Mat *aloc; 4462 4463 PetscFunctionBegin; 4464 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4465 if (!row){ 4466 start = A->rmap->rstart; end = A->rmap->rend; 4467 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4468 } else { 4469 isrowa = *row; 4470 } 4471 if (!col){ 4472 start = A->cmap->rstart; 4473 cmap = a->garray; 4474 nzA = a->A->cmap->n; 4475 nzB = a->B->cmap->n; 4476 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4477 ncols = 0; 4478 for (i=0; i<nzB; i++) { 4479 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4480 else break; 4481 } 4482 imark = i; 4483 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4484 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4485 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4486 ierr = PetscFree(idx);CHKERRQ(ierr); 4487 } else { 4488 iscola = *col; 4489 } 4490 if (scall != MAT_INITIAL_MATRIX){ 4491 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4492 aloc[0] = *A_loc; 4493 } 4494 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4495 *A_loc = aloc[0]; 4496 ierr = PetscFree(aloc);CHKERRQ(ierr); 4497 if (!row){ 4498 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4499 } 4500 if (!col){ 4501 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4502 } 4503 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4504 PetscFunctionReturn(0); 4505 } 4506 4507 #undef __FUNCT__ 4508 #define __FUNCT__ "MatGetBrowsOfAcols" 4509 /*@C 4510 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4511 4512 Collective on Mat 4513 4514 Input Parameters: 4515 + A,B - the matrices in mpiaij format 4516 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4517 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4518 4519 Output Parameter: 4520 + rowb, colb - index sets of rows and columns of B to extract 4521 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4522 - B_seq - the sequential matrix generated 4523 4524 Level: developer 4525 4526 @*/ 4527 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4528 { 4529 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4530 PetscErrorCode ierr; 4531 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4532 IS isrowb,iscolb; 4533 Mat *bseq; 4534 4535 PetscFunctionBegin; 4536 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4537 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4538 } 4539 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4540 4541 if (scall == MAT_INITIAL_MATRIX){ 4542 start = A->cmap->rstart; 4543 cmap = a->garray; 4544 nzA = a->A->cmap->n; 4545 nzB = a->B->cmap->n; 4546 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4547 ncols = 0; 4548 for (i=0; i<nzB; i++) { /* row < local row index */ 4549 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4550 else break; 4551 } 4552 imark = i; 4553 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4554 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4555 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4556 ierr = PetscFree(idx);CHKERRQ(ierr); 4557 *brstart = imark; 4558 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4559 } else { 4560 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4561 isrowb = *rowb; iscolb = *colb; 4562 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4563 bseq[0] = *B_seq; 4564 } 4565 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4566 *B_seq = bseq[0]; 4567 ierr = PetscFree(bseq);CHKERRQ(ierr); 4568 if (!rowb){ 4569 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4570 } else { 4571 *rowb = isrowb; 4572 } 4573 if (!colb){ 4574 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4575 } else { 4576 *colb = iscolb; 4577 } 4578 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4579 PetscFunctionReturn(0); 4580 } 4581 4582 #undef __FUNCT__ 4583 #define __FUNCT__ "MatGetBrowsOfAoCols" 4584 /*@C 4585 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4586 of the OFF-DIAGONAL portion of local A 4587 4588 Collective on Mat 4589 4590 Input Parameters: 4591 + A,B - the matrices in mpiaij format 4592 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4593 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4594 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4595 4596 Output Parameter: 4597 + B_oth - the sequential matrix generated 4598 4599 Level: developer 4600 4601 @*/ 4602 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4603 { 4604 VecScatter_MPI_General *gen_to,*gen_from; 4605 PetscErrorCode ierr; 4606 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4607 Mat_SeqAIJ *b_oth; 4608 VecScatter ctx=a->Mvctx; 4609 MPI_Comm comm=((PetscObject)ctx)->comm; 4610 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4611 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4612 PetscScalar *rvalues,*svalues; 4613 MatScalar *b_otha,*bufa,*bufA; 4614 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4615 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4616 MPI_Status *sstatus,rstatus; 4617 PetscMPIInt jj; 4618 PetscInt *cols,sbs,rbs; 4619 PetscScalar *vals; 4620 4621 PetscFunctionBegin; 4622 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4623 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4624 } 4625 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4626 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4627 4628 gen_to = (VecScatter_MPI_General*)ctx->todata; 4629 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4630 rvalues = gen_from->values; /* holds the length of receiving row */ 4631 svalues = gen_to->values; /* holds the length of sending row */ 4632 nrecvs = gen_from->n; 4633 nsends = gen_to->n; 4634 4635 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4636 srow = gen_to->indices; /* local row index to be sent */ 4637 sstarts = gen_to->starts; 4638 sprocs = gen_to->procs; 4639 sstatus = gen_to->sstatus; 4640 sbs = gen_to->bs; 4641 rstarts = gen_from->starts; 4642 rprocs = gen_from->procs; 4643 rbs = gen_from->bs; 4644 4645 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4646 if (scall == MAT_INITIAL_MATRIX){ 4647 /* i-array */ 4648 /*---------*/ 4649 /* post receives */ 4650 for (i=0; i<nrecvs; i++){ 4651 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4652 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4653 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4654 } 4655 4656 /* pack the outgoing message */ 4657 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4658 rstartsj = sstartsj + nsends +1; 4659 sstartsj[0] = 0; rstartsj[0] = 0; 4660 len = 0; /* total length of j or a array to be sent */ 4661 k = 0; 4662 for (i=0; i<nsends; i++){ 4663 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4664 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4665 for (j=0; j<nrows; j++) { 4666 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4667 for (l=0; l<sbs; l++){ 4668 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4669 rowlen[j*sbs+l] = ncols; 4670 len += ncols; 4671 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4672 } 4673 k++; 4674 } 4675 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4676 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4677 } 4678 /* recvs and sends of i-array are completed */ 4679 i = nrecvs; 4680 while (i--) { 4681 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4682 } 4683 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4684 4685 /* allocate buffers for sending j and a arrays */ 4686 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4687 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4688 4689 /* create i-array of B_oth */ 4690 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4691 b_othi[0] = 0; 4692 len = 0; /* total length of j or a array to be received */ 4693 k = 0; 4694 for (i=0; i<nrecvs; i++){ 4695 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4696 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4697 for (j=0; j<nrows; j++) { 4698 b_othi[k+1] = b_othi[k] + rowlen[j]; 4699 len += rowlen[j]; k++; 4700 } 4701 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4702 } 4703 4704 /* allocate space for j and a arrrays of B_oth */ 4705 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4706 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4707 4708 /* j-array */ 4709 /*---------*/ 4710 /* post receives of j-array */ 4711 for (i=0; i<nrecvs; i++){ 4712 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4713 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4714 } 4715 4716 /* pack the outgoing message j-array */ 4717 k = 0; 4718 for (i=0; i<nsends; i++){ 4719 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4720 bufJ = bufj+sstartsj[i]; 4721 for (j=0; j<nrows; j++) { 4722 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4723 for (ll=0; ll<sbs; ll++){ 4724 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4725 for (l=0; l<ncols; l++){ 4726 *bufJ++ = cols[l]; 4727 } 4728 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4729 } 4730 } 4731 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4732 } 4733 4734 /* recvs and sends of j-array are completed */ 4735 i = nrecvs; 4736 while (i--) { 4737 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4738 } 4739 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4740 } else if (scall == MAT_REUSE_MATRIX){ 4741 sstartsj = *startsj; 4742 rstartsj = sstartsj + nsends +1; 4743 bufa = *bufa_ptr; 4744 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4745 b_otha = b_oth->a; 4746 } else { 4747 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4748 } 4749 4750 /* a-array */ 4751 /*---------*/ 4752 /* post receives of a-array */ 4753 for (i=0; i<nrecvs; i++){ 4754 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4755 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4756 } 4757 4758 /* pack the outgoing message a-array */ 4759 k = 0; 4760 for (i=0; i<nsends; i++){ 4761 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4762 bufA = bufa+sstartsj[i]; 4763 for (j=0; j<nrows; j++) { 4764 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4765 for (ll=0; ll<sbs; ll++){ 4766 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4767 for (l=0; l<ncols; l++){ 4768 *bufA++ = vals[l]; 4769 } 4770 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4771 } 4772 } 4773 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4774 } 4775 /* recvs and sends of a-array are completed */ 4776 i = nrecvs; 4777 while (i--) { 4778 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4779 } 4780 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4781 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4782 4783 if (scall == MAT_INITIAL_MATRIX){ 4784 /* put together the new matrix */ 4785 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4786 4787 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4788 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4789 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4790 b_oth->free_a = PETSC_TRUE; 4791 b_oth->free_ij = PETSC_TRUE; 4792 b_oth->nonew = 0; 4793 4794 ierr = PetscFree(bufj);CHKERRQ(ierr); 4795 if (!startsj || !bufa_ptr){ 4796 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4797 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4798 } else { 4799 *startsj = sstartsj; 4800 *bufa_ptr = bufa; 4801 } 4802 } 4803 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4804 PetscFunctionReturn(0); 4805 } 4806 4807 #undef __FUNCT__ 4808 #define __FUNCT__ "MatGetCommunicationStructs" 4809 /*@C 4810 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4811 4812 Not Collective 4813 4814 Input Parameters: 4815 . A - The matrix in mpiaij format 4816 4817 Output Parameter: 4818 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4819 . colmap - A map from global column index to local index into lvec 4820 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4821 4822 Level: developer 4823 4824 @*/ 4825 #if defined (PETSC_USE_CTABLE) 4826 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4827 #else 4828 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4829 #endif 4830 { 4831 Mat_MPIAIJ *a; 4832 4833 PetscFunctionBegin; 4834 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4835 PetscValidPointer(lvec, 2) 4836 PetscValidPointer(colmap, 3) 4837 PetscValidPointer(multScatter, 4) 4838 a = (Mat_MPIAIJ *) A->data; 4839 if (lvec) *lvec = a->lvec; 4840 if (colmap) *colmap = a->colmap; 4841 if (multScatter) *multScatter = a->Mvctx; 4842 PetscFunctionReturn(0); 4843 } 4844 4845 EXTERN_C_BEGIN 4846 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4847 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4848 EXTERN_C_END 4849 4850 #include "../src/mat/impls/dense/mpi/mpidense.h" 4851 4852 #undef __FUNCT__ 4853 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4854 /* 4855 Computes (B'*A')' since computing B*A directly is untenable 4856 4857 n p p 4858 ( ) ( ) ( ) 4859 m ( A ) * n ( B ) = m ( C ) 4860 ( ) ( ) ( ) 4861 4862 */ 4863 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4864 { 4865 PetscErrorCode ierr; 4866 Mat At,Bt,Ct; 4867 4868 PetscFunctionBegin; 4869 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4870 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4871 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4872 ierr = MatDestroy(At);CHKERRQ(ierr); 4873 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4874 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4875 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4876 PetscFunctionReturn(0); 4877 } 4878 4879 #undef __FUNCT__ 4880 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4881 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4882 { 4883 PetscErrorCode ierr; 4884 PetscInt m=A->rmap->n,n=B->cmap->n; 4885 Mat Cmat; 4886 4887 PetscFunctionBegin; 4888 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4889 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4890 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4891 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4892 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4893 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4894 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4895 *C = Cmat; 4896 PetscFunctionReturn(0); 4897 } 4898 4899 /* ----------------------------------------------------------------*/ 4900 #undef __FUNCT__ 4901 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4902 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4903 { 4904 PetscErrorCode ierr; 4905 4906 PetscFunctionBegin; 4907 if (scall == MAT_INITIAL_MATRIX){ 4908 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4909 } 4910 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4911 PetscFunctionReturn(0); 4912 } 4913 4914 EXTERN_C_BEGIN 4915 #if defined(PETSC_HAVE_MUMPS) 4916 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4917 #endif 4918 #if defined(PETSC_HAVE_PASTIX) 4919 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4920 #endif 4921 #if defined(PETSC_HAVE_SUPERLU_DIST) 4922 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4923 #endif 4924 #if defined(PETSC_HAVE_SPOOLES) 4925 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4926 #endif 4927 EXTERN_C_END 4928 4929 /*MC 4930 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4931 4932 Options Database Keys: 4933 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4934 4935 Level: beginner 4936 4937 .seealso: MatCreateMPIAIJ() 4938 M*/ 4939 4940 EXTERN_C_BEGIN 4941 #undef __FUNCT__ 4942 #define __FUNCT__ "MatCreate_MPIAIJ" 4943 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4944 { 4945 Mat_MPIAIJ *b; 4946 PetscErrorCode ierr; 4947 PetscMPIInt size; 4948 4949 PetscFunctionBegin; 4950 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4951 4952 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4953 B->data = (void*)b; 4954 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4955 B->rmap->bs = 1; 4956 B->assembled = PETSC_FALSE; 4957 B->mapping = 0; 4958 4959 B->insertmode = NOT_SET_VALUES; 4960 b->size = size; 4961 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4962 4963 /* build cache for off array entries formed */ 4964 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4965 b->donotstash = PETSC_FALSE; 4966 b->colmap = 0; 4967 b->garray = 0; 4968 b->roworiented = PETSC_TRUE; 4969 4970 /* stuff used for matrix vector multiply */ 4971 b->lvec = PETSC_NULL; 4972 b->Mvctx = PETSC_NULL; 4973 4974 /* stuff for MatGetRow() */ 4975 b->rowindices = 0; 4976 b->rowvalues = 0; 4977 b->getrowactive = PETSC_FALSE; 4978 4979 #if defined(PETSC_HAVE_SPOOLES) 4980 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 4981 "MatGetFactor_mpiaij_spooles", 4982 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 4983 #endif 4984 #if defined(PETSC_HAVE_MUMPS) 4985 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 4986 "MatGetFactor_mpiaij_mumps", 4987 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 4988 #endif 4989 #if defined(PETSC_HAVE_PASTIX) 4990 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 4991 "MatGetFactor_mpiaij_pastix", 4992 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 4993 #endif 4994 #if defined(PETSC_HAVE_SUPERLU_DIST) 4995 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 4996 "MatGetFactor_mpiaij_superlu_dist", 4997 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 4998 #endif 4999 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5000 "MatStoreValues_MPIAIJ", 5001 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5002 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5003 "MatRetrieveValues_MPIAIJ", 5004 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5005 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5006 "MatGetDiagonalBlock_MPIAIJ", 5007 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5008 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5009 "MatIsTranspose_MPIAIJ", 5010 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5011 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5012 "MatMPIAIJSetPreallocation_MPIAIJ", 5013 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5015 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5016 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5017 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5018 "MatDiagonalScaleLocal_MPIAIJ", 5019 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5020 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5021 "MatConvert_MPIAIJ_MPICSRPERM", 5022 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5023 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5024 "MatConvert_MPIAIJ_MPICRL", 5025 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5026 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5027 "MatMatMult_MPIDense_MPIAIJ", 5028 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5029 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5030 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5031 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5032 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5033 "MatMatMultNumeric_MPIDense_MPIAIJ", 5034 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5035 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5036 PetscFunctionReturn(0); 5037 } 5038 EXTERN_C_END 5039 5040 #undef __FUNCT__ 5041 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5042 /*@ 5043 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5044 and "off-diagonal" part of the matrix in CSR format. 5045 5046 Collective on MPI_Comm 5047 5048 Input Parameters: 5049 + comm - MPI communicator 5050 . m - number of local rows (Cannot be PETSC_DECIDE) 5051 . n - This value should be the same as the local size used in creating the 5052 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5053 calculated if N is given) For square matrices n is almost always m. 5054 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5055 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5056 . i - row indices for "diagonal" portion of matrix 5057 . j - column indices 5058 . a - matrix values 5059 . oi - row indices for "off-diagonal" portion of matrix 5060 . oj - column indices 5061 - oa - matrix values 5062 5063 Output Parameter: 5064 . mat - the matrix 5065 5066 Level: advanced 5067 5068 Notes: 5069 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5070 5071 The i and j indices are 0 based 5072 5073 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5074 5075 5076 .keywords: matrix, aij, compressed row, sparse, parallel 5077 5078 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5079 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5080 @*/ 5081 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5082 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5083 { 5084 PetscErrorCode ierr; 5085 Mat_MPIAIJ *maij; 5086 5087 PetscFunctionBegin; 5088 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5089 if (i[0]) { 5090 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5091 } 5092 if (oi[0]) { 5093 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5094 } 5095 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5096 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5097 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5098 maij = (Mat_MPIAIJ*) (*mat)->data; 5099 maij->donotstash = PETSC_TRUE; 5100 (*mat)->preallocated = PETSC_TRUE; 5101 5102 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5103 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5104 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5105 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5106 5107 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5108 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5109 5110 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5111 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5112 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5113 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5114 5115 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5116 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5117 PetscFunctionReturn(0); 5118 } 5119 5120 /* 5121 Special version for direct calls from Fortran 5122 */ 5123 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5124 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5125 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5126 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5127 #endif 5128 5129 /* Change these macros so can be used in void function */ 5130 #undef CHKERRQ 5131 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5132 #undef SETERRQ2 5133 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5134 #undef SETERRQ 5135 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5136 5137 EXTERN_C_BEGIN 5138 #undef __FUNCT__ 5139 #define __FUNCT__ "matsetvaluesmpiaij_" 5140 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5141 { 5142 Mat mat = *mmat; 5143 PetscInt m = *mm, n = *mn; 5144 InsertMode addv = *maddv; 5145 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5146 PetscScalar value; 5147 PetscErrorCode ierr; 5148 5149 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5150 if (mat->insertmode == NOT_SET_VALUES) { 5151 mat->insertmode = addv; 5152 } 5153 #if defined(PETSC_USE_DEBUG) 5154 else if (mat->insertmode != addv) { 5155 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5156 } 5157 #endif 5158 { 5159 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5160 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5161 PetscTruth roworiented = aij->roworiented; 5162 5163 /* Some Variables required in the macro */ 5164 Mat A = aij->A; 5165 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5166 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5167 MatScalar *aa = a->a; 5168 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5169 Mat B = aij->B; 5170 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5171 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5172 MatScalar *ba = b->a; 5173 5174 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5175 PetscInt nonew = a->nonew; 5176 MatScalar *ap1,*ap2; 5177 5178 PetscFunctionBegin; 5179 for (i=0; i<m; i++) { 5180 if (im[i] < 0) continue; 5181 #if defined(PETSC_USE_DEBUG) 5182 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5183 #endif 5184 if (im[i] >= rstart && im[i] < rend) { 5185 row = im[i] - rstart; 5186 lastcol1 = -1; 5187 rp1 = aj + ai[row]; 5188 ap1 = aa + ai[row]; 5189 rmax1 = aimax[row]; 5190 nrow1 = ailen[row]; 5191 low1 = 0; 5192 high1 = nrow1; 5193 lastcol2 = -1; 5194 rp2 = bj + bi[row]; 5195 ap2 = ba + bi[row]; 5196 rmax2 = bimax[row]; 5197 nrow2 = bilen[row]; 5198 low2 = 0; 5199 high2 = nrow2; 5200 5201 for (j=0; j<n; j++) { 5202 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5203 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5204 if (in[j] >= cstart && in[j] < cend){ 5205 col = in[j] - cstart; 5206 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5207 } else if (in[j] < 0) continue; 5208 #if defined(PETSC_USE_DEBUG) 5209 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5210 #endif 5211 else { 5212 if (mat->was_assembled) { 5213 if (!aij->colmap) { 5214 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5215 } 5216 #if defined (PETSC_USE_CTABLE) 5217 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5218 col--; 5219 #else 5220 col = aij->colmap[in[j]] - 1; 5221 #endif 5222 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5223 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5224 col = in[j]; 5225 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5226 B = aij->B; 5227 b = (Mat_SeqAIJ*)B->data; 5228 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5229 rp2 = bj + bi[row]; 5230 ap2 = ba + bi[row]; 5231 rmax2 = bimax[row]; 5232 nrow2 = bilen[row]; 5233 low2 = 0; 5234 high2 = nrow2; 5235 bm = aij->B->rmap->n; 5236 ba = b->a; 5237 } 5238 } else col = in[j]; 5239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5240 } 5241 } 5242 } else { 5243 if (!aij->donotstash) { 5244 if (roworiented) { 5245 if (ignorezeroentries && v[i*n] == 0.0) continue; 5246 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5247 } else { 5248 if (ignorezeroentries && v[i] == 0.0) continue; 5249 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5250 } 5251 } 5252 } 5253 }} 5254 PetscFunctionReturnVoid(); 5255 } 5256 EXTERN_C_END 5257 5258