1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "../src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap->N; 932 header[2] = mat->cmap->N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap->n; 938 range = mat->rmap->range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap->n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap->n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap->n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap->n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap->N > 1024) { 1114 PetscTruth flg = PETSC_FALSE; 1115 1116 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1117 if (!flg) { 1118 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1119 } 1120 } 1121 1122 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1123 if (!rank) { 1124 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1125 } else { 1126 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1127 } 1128 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1129 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1130 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1132 1133 /* copy over the A part */ 1134 Aloc = (Mat_SeqAIJ*)aij->A->data; 1135 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1136 row = mat->rmap->rstart; 1137 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1138 for (i=0; i<m; i++) { 1139 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1140 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1141 } 1142 aj = Aloc->j; 1143 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1144 1145 /* copy over the B part */ 1146 Aloc = (Mat_SeqAIJ*)aij->B->data; 1147 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148 row = mat->rmap->rstart; 1149 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1150 ct = cols; 1151 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1155 } 1156 ierr = PetscFree(ct);CHKERRQ(ierr); 1157 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1158 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1159 /* 1160 Everyone has to call to draw the matrix since the graphics waits are 1161 synchronized across all processors that share the PetscDraw object 1162 */ 1163 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1164 if (!rank) { 1165 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1166 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1167 } 1168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169 ierr = MatDestroy(A);CHKERRQ(ierr); 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatView_MPIAIJ" 1176 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1177 { 1178 PetscErrorCode ierr; 1179 PetscTruth iascii,isdraw,issocket,isbinary; 1180 1181 PetscFunctionBegin; 1182 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1183 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1184 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1185 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1186 if (iascii || isdraw || isbinary || issocket) { 1187 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1188 } else { 1189 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatRelax_MPIAIJ" 1196 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1197 { 1198 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1199 PetscErrorCode ierr; 1200 Vec bb1; 1201 1202 PetscFunctionBegin; 1203 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1204 1205 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1206 if (flag & SOR_ZERO_INITIAL_GUESS) { 1207 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1208 its--; 1209 } 1210 1211 while (its--) { 1212 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 1215 /* update rhs: bb1 = bb - B*x */ 1216 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1217 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1218 1219 /* local sweep */ 1220 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1221 } 1222 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1223 if (flag & SOR_ZERO_INITIAL_GUESS) { 1224 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1225 its--; 1226 } 1227 while (its--) { 1228 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 1231 /* update rhs: bb1 = bb - B*x */ 1232 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1233 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1234 1235 /* local sweep */ 1236 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1237 } 1238 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1239 if (flag & SOR_ZERO_INITIAL_GUESS) { 1240 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1241 its--; 1242 } 1243 while (its--) { 1244 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 1247 /* update rhs: bb1 = bb - B*x */ 1248 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1249 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1250 1251 /* local sweep */ 1252 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1253 } 1254 } else { 1255 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1256 } 1257 1258 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatPermute_MPIAIJ" 1264 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1265 { 1266 MPI_Comm comm,pcomm; 1267 PetscInt first,local_size,nrows; 1268 const PetscInt *rows; 1269 PetscMPIInt size; 1270 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1275 /* make a collective version of 'rowp' */ 1276 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1277 if (pcomm==comm) { 1278 crowp = rowp; 1279 } else { 1280 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1281 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1282 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1283 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1284 } 1285 /* collect the global row permutation and invert it */ 1286 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1287 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1288 if (pcomm!=comm) { 1289 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1290 } 1291 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1292 /* get the local target indices */ 1293 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1294 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1295 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1296 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1297 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1298 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1299 /* the column permutation is so much easier; 1300 make a local version of 'colp' and invert it */ 1301 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1302 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1303 if (size==1) { 1304 lcolp = colp; 1305 } else { 1306 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1307 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1308 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1309 } 1310 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1311 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1312 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1313 if (size>1) { 1314 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1315 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1316 } 1317 /* now we just get the submatrix */ 1318 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1319 /* clean up */ 1320 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1321 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1327 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1328 { 1329 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1330 Mat A = mat->A,B = mat->B; 1331 PetscErrorCode ierr; 1332 PetscReal isend[5],irecv[5]; 1333 1334 PetscFunctionBegin; 1335 info->block_size = 1.0; 1336 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1337 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1338 isend[3] = info->memory; isend[4] = info->mallocs; 1339 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1340 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1341 isend[3] += info->memory; isend[4] += info->mallocs; 1342 if (flag == MAT_LOCAL) { 1343 info->nz_used = isend[0]; 1344 info->nz_allocated = isend[1]; 1345 info->nz_unneeded = isend[2]; 1346 info->memory = isend[3]; 1347 info->mallocs = isend[4]; 1348 } else if (flag == MAT_GLOBAL_MAX) { 1349 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1350 info->nz_used = irecv[0]; 1351 info->nz_allocated = irecv[1]; 1352 info->nz_unneeded = irecv[2]; 1353 info->memory = irecv[3]; 1354 info->mallocs = irecv[4]; 1355 } else if (flag == MAT_GLOBAL_SUM) { 1356 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1357 info->nz_used = irecv[0]; 1358 info->nz_allocated = irecv[1]; 1359 info->nz_unneeded = irecv[2]; 1360 info->memory = irecv[3]; 1361 info->mallocs = irecv[4]; 1362 } 1363 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1364 info->fill_ratio_needed = 0; 1365 info->factor_mallocs = 0; 1366 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatSetOption_MPIAIJ" 1372 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1373 { 1374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 switch (op) { 1379 case MAT_NEW_NONZERO_LOCATIONS: 1380 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1381 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1382 case MAT_KEEP_ZEROED_ROWS: 1383 case MAT_NEW_NONZERO_LOCATION_ERR: 1384 case MAT_USE_INODES: 1385 case MAT_IGNORE_ZERO_ENTRIES: 1386 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1387 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1388 break; 1389 case MAT_ROW_ORIENTED: 1390 a->roworiented = flg; 1391 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1392 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1393 break; 1394 case MAT_NEW_DIAGONALS: 1395 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1396 break; 1397 case MAT_IGNORE_OFF_PROC_ENTRIES: 1398 a->donotstash = PETSC_TRUE; 1399 break; 1400 case MAT_SYMMETRIC: 1401 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1402 break; 1403 case MAT_STRUCTURALLY_SYMMETRIC: 1404 case MAT_HERMITIAN: 1405 case MAT_SYMMETRY_ETERNAL: 1406 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1407 break; 1408 default: 1409 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatGetRow_MPIAIJ" 1416 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1417 { 1418 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1419 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1420 PetscErrorCode ierr; 1421 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1422 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1423 PetscInt *cmap,*idx_p; 1424 1425 PetscFunctionBegin; 1426 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1427 mat->getrowactive = PETSC_TRUE; 1428 1429 if (!mat->rowvalues && (idx || v)) { 1430 /* 1431 allocate enough space to hold information from the longest row. 1432 */ 1433 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1434 PetscInt max = 1,tmp; 1435 for (i=0; i<matin->rmap->n; i++) { 1436 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1437 if (max < tmp) { max = tmp; } 1438 } 1439 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1440 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1441 } 1442 1443 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1444 lrow = row - rstart; 1445 1446 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1447 if (!v) {pvA = 0; pvB = 0;} 1448 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1449 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1450 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1451 nztot = nzA + nzB; 1452 1453 cmap = mat->garray; 1454 if (v || idx) { 1455 if (nztot) { 1456 /* Sort by increasing column numbers, assuming A and B already sorted */ 1457 PetscInt imark = -1; 1458 if (v) { 1459 *v = v_p = mat->rowvalues; 1460 for (i=0; i<nzB; i++) { 1461 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1462 else break; 1463 } 1464 imark = i; 1465 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1466 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1467 } 1468 if (idx) { 1469 *idx = idx_p = mat->rowindices; 1470 if (imark > -1) { 1471 for (i=0; i<imark; i++) { 1472 idx_p[i] = cmap[cworkB[i]]; 1473 } 1474 } else { 1475 for (i=0; i<nzB; i++) { 1476 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1477 else break; 1478 } 1479 imark = i; 1480 } 1481 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1482 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1483 } 1484 } else { 1485 if (idx) *idx = 0; 1486 if (v) *v = 0; 1487 } 1488 } 1489 *nz = nztot; 1490 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1491 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1497 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1498 { 1499 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1500 1501 PetscFunctionBegin; 1502 if (!aij->getrowactive) { 1503 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1504 } 1505 aij->getrowactive = PETSC_FALSE; 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatNorm_MPIAIJ" 1511 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1512 { 1513 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1514 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1515 PetscErrorCode ierr; 1516 PetscInt i,j,cstart = mat->cmap->rstart; 1517 PetscReal sum = 0.0; 1518 MatScalar *v; 1519 1520 PetscFunctionBegin; 1521 if (aij->size == 1) { 1522 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1523 } else { 1524 if (type == NORM_FROBENIUS) { 1525 v = amat->a; 1526 for (i=0; i<amat->nz; i++) { 1527 #if defined(PETSC_USE_COMPLEX) 1528 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1529 #else 1530 sum += (*v)*(*v); v++; 1531 #endif 1532 } 1533 v = bmat->a; 1534 for (i=0; i<bmat->nz; i++) { 1535 #if defined(PETSC_USE_COMPLEX) 1536 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1537 #else 1538 sum += (*v)*(*v); v++; 1539 #endif 1540 } 1541 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1542 *norm = sqrt(*norm); 1543 } else if (type == NORM_1) { /* max column norm */ 1544 PetscReal *tmp,*tmp2; 1545 PetscInt *jj,*garray = aij->garray; 1546 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1547 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1548 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1549 *norm = 0.0; 1550 v = amat->a; jj = amat->j; 1551 for (j=0; j<amat->nz; j++) { 1552 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1553 } 1554 v = bmat->a; jj = bmat->j; 1555 for (j=0; j<bmat->nz; j++) { 1556 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1557 } 1558 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1559 for (j=0; j<mat->cmap->N; j++) { 1560 if (tmp2[j] > *norm) *norm = tmp2[j]; 1561 } 1562 ierr = PetscFree(tmp);CHKERRQ(ierr); 1563 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1564 } else if (type == NORM_INFINITY) { /* max row norm */ 1565 PetscReal ntemp = 0.0; 1566 for (j=0; j<aij->A->rmap->n; j++) { 1567 v = amat->a + amat->i[j]; 1568 sum = 0.0; 1569 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1570 sum += PetscAbsScalar(*v); v++; 1571 } 1572 v = bmat->a + bmat->i[j]; 1573 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1574 sum += PetscAbsScalar(*v); v++; 1575 } 1576 if (sum > ntemp) ntemp = sum; 1577 } 1578 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1579 } else { 1580 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1581 } 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatTranspose_MPIAIJ" 1588 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1589 { 1590 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1591 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1592 PetscErrorCode ierr; 1593 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1594 PetscInt cstart=A->cmap->rstart,ncol; 1595 Mat B; 1596 MatScalar *array; 1597 1598 PetscFunctionBegin; 1599 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1600 1601 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1602 ai = Aloc->i; aj = Aloc->j; 1603 bi = Bloc->i; bj = Bloc->j; 1604 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1605 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1606 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1607 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1608 for (i=0; i<ai[ma]; i++){ 1609 d_nnz[aj[i]] ++; 1610 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1611 } 1612 1613 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1614 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1615 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1616 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1617 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1618 } else { 1619 B = *matout; 1620 } 1621 1622 /* copy over the A part */ 1623 array = Aloc->a; 1624 row = A->rmap->rstart; 1625 for (i=0; i<ma; i++) { 1626 ncol = ai[i+1]-ai[i]; 1627 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1628 row++; array += ncol; aj += ncol; 1629 } 1630 aj = Aloc->j; 1631 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1632 1633 /* copy over the B part */ 1634 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1635 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1636 array = Bloc->a; 1637 row = A->rmap->rstart; 1638 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1639 cols_tmp = cols; 1640 for (i=0; i<mb; i++) { 1641 ncol = bi[i+1]-bi[i]; 1642 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1643 row++; array += ncol; cols_tmp += ncol; 1644 } 1645 ierr = PetscFree(cols);CHKERRQ(ierr); 1646 1647 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1648 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1649 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1650 *matout = B; 1651 } else { 1652 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1653 } 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1659 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1660 { 1661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1662 Mat a = aij->A,b = aij->B; 1663 PetscErrorCode ierr; 1664 PetscInt s1,s2,s3; 1665 1666 PetscFunctionBegin; 1667 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1668 if (rr) { 1669 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1670 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1671 /* Overlap communication with computation. */ 1672 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 } 1674 if (ll) { 1675 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1676 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1677 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1678 } 1679 /* scale the diagonal block */ 1680 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1681 1682 if (rr) { 1683 /* Do a scatter end and then right scale the off-diagonal block */ 1684 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1685 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1686 } 1687 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1693 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1694 { 1695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1696 PetscErrorCode ierr; 1697 1698 PetscFunctionBegin; 1699 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1700 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1705 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1706 { 1707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1708 PetscErrorCode ierr; 1709 1710 PetscFunctionBegin; 1711 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatEqual_MPIAIJ" 1717 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1718 { 1719 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1720 Mat a,b,c,d; 1721 PetscTruth flg; 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 a = matA->A; b = matA->B; 1726 c = matB->A; d = matB->B; 1727 1728 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1729 if (flg) { 1730 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1731 } 1732 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatCopy_MPIAIJ" 1738 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1739 { 1740 PetscErrorCode ierr; 1741 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1742 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1743 1744 PetscFunctionBegin; 1745 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1746 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1747 /* because of the column compression in the off-processor part of the matrix a->B, 1748 the number of columns in a->B and b->B may be different, hence we cannot call 1749 the MatCopy() directly on the two parts. If need be, we can provide a more 1750 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1751 then copying the submatrices */ 1752 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1753 } else { 1754 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1755 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1762 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1763 { 1764 PetscErrorCode ierr; 1765 1766 PetscFunctionBegin; 1767 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #include "petscblaslapack.h" 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatAXPY_MPIAIJ" 1774 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1775 { 1776 PetscErrorCode ierr; 1777 PetscInt i; 1778 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1779 PetscBLASInt bnz,one=1; 1780 Mat_SeqAIJ *x,*y; 1781 1782 PetscFunctionBegin; 1783 if (str == SAME_NONZERO_PATTERN) { 1784 PetscScalar alpha = a; 1785 x = (Mat_SeqAIJ *)xx->A->data; 1786 y = (Mat_SeqAIJ *)yy->A->data; 1787 bnz = PetscBLASIntCast(x->nz); 1788 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1789 x = (Mat_SeqAIJ *)xx->B->data; 1790 y = (Mat_SeqAIJ *)yy->B->data; 1791 bnz = PetscBLASIntCast(x->nz); 1792 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1793 } else if (str == SUBSET_NONZERO_PATTERN) { 1794 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1795 1796 x = (Mat_SeqAIJ *)xx->B->data; 1797 y = (Mat_SeqAIJ *)yy->B->data; 1798 if (y->xtoy && y->XtoY != xx->B) { 1799 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1800 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1801 } 1802 if (!y->xtoy) { /* get xtoy */ 1803 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1804 y->XtoY = xx->B; 1805 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1806 } 1807 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1808 } else { 1809 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatConjugate_MPIAIJ" 1818 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1819 { 1820 #if defined(PETSC_USE_COMPLEX) 1821 PetscErrorCode ierr; 1822 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1823 1824 PetscFunctionBegin; 1825 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1826 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1827 #else 1828 PetscFunctionBegin; 1829 #endif 1830 PetscFunctionReturn(0); 1831 } 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatRealPart_MPIAIJ" 1835 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1836 { 1837 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1838 PetscErrorCode ierr; 1839 1840 PetscFunctionBegin; 1841 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1842 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1848 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1849 { 1850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1851 PetscErrorCode ierr; 1852 1853 PetscFunctionBegin; 1854 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1855 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #ifdef PETSC_HAVE_PBGL 1860 1861 #include <boost/parallel/mpi/bsp_process_group.hpp> 1862 #include <boost/graph/distributed/ilu_default_graph.hpp> 1863 #include <boost/graph/distributed/ilu_0_block.hpp> 1864 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1865 #include <boost/graph/distributed/petsc/interface.hpp> 1866 #include <boost/multi_array.hpp> 1867 #include <boost/parallel/distributed_property_map->hpp> 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1871 /* 1872 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1873 */ 1874 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1875 { 1876 namespace petsc = boost::distributed::petsc; 1877 1878 namespace graph_dist = boost::graph::distributed; 1879 using boost::graph::distributed::ilu_default::process_group_type; 1880 using boost::graph::ilu_permuted; 1881 1882 PetscTruth row_identity, col_identity; 1883 PetscContainer c; 1884 PetscInt m, n, M, N; 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1889 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1890 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1891 if (!row_identity || !col_identity) { 1892 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1893 } 1894 1895 process_group_type pg; 1896 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1897 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1898 lgraph_type& level_graph = *lgraph_p; 1899 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1900 1901 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1902 ilu_permuted(level_graph); 1903 1904 /* put together the new matrix */ 1905 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1906 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1907 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1908 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1909 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1910 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1911 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1912 1913 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1914 ierr = PetscContainerSetPointer(c, lgraph_p); 1915 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1921 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1922 { 1923 PetscFunctionBegin; 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatSolve_MPIAIJ" 1929 /* 1930 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1931 */ 1932 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1933 { 1934 namespace graph_dist = boost::graph::distributed; 1935 1936 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1937 lgraph_type* lgraph_p; 1938 PetscContainer c; 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1943 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1944 ierr = VecCopy(b, x);CHKERRQ(ierr); 1945 1946 PetscScalar* array_x; 1947 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1948 PetscInt sx; 1949 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1950 1951 PetscScalar* array_b; 1952 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1953 PetscInt sb; 1954 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1955 1956 lgraph_type& level_graph = *lgraph_p; 1957 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1958 1959 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1960 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1961 ref_x(array_x, boost::extents[num_vertices(graph)]); 1962 1963 typedef boost::iterator_property_map<array_ref_type::iterator, 1964 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1965 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1966 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1967 1968 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1969 1970 PetscFunctionReturn(0); 1971 } 1972 #endif 1973 1974 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1975 PetscInt nzlocal,nsends,nrecvs; 1976 PetscMPIInt *send_rank; 1977 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1978 PetscScalar *sbuf_a,**rbuf_a; 1979 PetscErrorCode (*MatDestroy)(Mat); 1980 } Mat_Redundant; 1981 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1984 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1985 { 1986 PetscErrorCode ierr; 1987 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1988 PetscInt i; 1989 1990 PetscFunctionBegin; 1991 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1992 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1993 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1994 for (i=0; i<redund->nrecvs; i++){ 1995 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1996 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1997 } 1998 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1999 ierr = PetscFree(redund);CHKERRQ(ierr); 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatDestroy_MatRedundant" 2005 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2006 { 2007 PetscErrorCode ierr; 2008 PetscContainer container; 2009 Mat_Redundant *redund=PETSC_NULL; 2010 2011 PetscFunctionBegin; 2012 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2013 if (container) { 2014 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2015 } else { 2016 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2017 } 2018 A->ops->destroy = redund->MatDestroy; 2019 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2020 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2021 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2027 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2028 { 2029 PetscMPIInt rank,size; 2030 MPI_Comm comm=((PetscObject)mat)->comm; 2031 PetscErrorCode ierr; 2032 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2033 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2034 PetscInt *rowrange=mat->rmap->range; 2035 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2036 Mat A=aij->A,B=aij->B,C=*matredundant; 2037 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2038 PetscScalar *sbuf_a; 2039 PetscInt nzlocal=a->nz+b->nz; 2040 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2041 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2042 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2043 MatScalar *aworkA,*aworkB; 2044 PetscScalar *vals; 2045 PetscMPIInt tag1,tag2,tag3,imdex; 2046 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2047 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2048 MPI_Status recv_status,*send_status; 2049 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2050 PetscInt **rbuf_j=PETSC_NULL; 2051 PetscScalar **rbuf_a=PETSC_NULL; 2052 Mat_Redundant *redund=PETSC_NULL; 2053 PetscContainer container; 2054 2055 PetscFunctionBegin; 2056 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2057 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2058 2059 if (reuse == MAT_REUSE_MATRIX) { 2060 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2061 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2062 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2063 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2064 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2065 if (container) { 2066 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2067 } else { 2068 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2069 } 2070 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2071 2072 nsends = redund->nsends; 2073 nrecvs = redund->nrecvs; 2074 send_rank = redund->send_rank; recv_rank = send_rank + size; 2075 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2076 sbuf_j = redund->sbuf_j; 2077 sbuf_a = redund->sbuf_a; 2078 rbuf_j = redund->rbuf_j; 2079 rbuf_a = redund->rbuf_a; 2080 } 2081 2082 if (reuse == MAT_INITIAL_MATRIX){ 2083 PetscMPIInt subrank,subsize; 2084 PetscInt nleftover,np_subcomm; 2085 /* get the destination processors' id send_rank, nsends and nrecvs */ 2086 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2087 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2088 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2089 recv_rank = send_rank + size; 2090 np_subcomm = size/nsubcomm; 2091 nleftover = size - nsubcomm*np_subcomm; 2092 nsends = 0; nrecvs = 0; 2093 for (i=0; i<size; i++){ /* i=rank*/ 2094 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2095 send_rank[nsends] = i; nsends++; 2096 recv_rank[nrecvs++] = i; 2097 } 2098 } 2099 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2100 i = size-nleftover-1; 2101 j = 0; 2102 while (j < nsubcomm - nleftover){ 2103 send_rank[nsends++] = i; 2104 i--; j++; 2105 } 2106 } 2107 2108 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2109 for (i=0; i<nleftover; i++){ 2110 recv_rank[nrecvs++] = size-nleftover+i; 2111 } 2112 } 2113 2114 /* allocate sbuf_j, sbuf_a */ 2115 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2116 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2117 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2118 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2119 2120 /* copy mat's local entries into the buffers */ 2121 if (reuse == MAT_INITIAL_MATRIX){ 2122 rownz_max = 0; 2123 rptr = sbuf_j; 2124 cols = sbuf_j + rend-rstart + 1; 2125 vals = sbuf_a; 2126 rptr[0] = 0; 2127 for (i=0; i<rend-rstart; i++){ 2128 row = i + rstart; 2129 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2130 ncols = nzA + nzB; 2131 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2132 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2133 /* load the column indices for this row into cols */ 2134 lwrite = 0; 2135 for (l=0; l<nzB; l++) { 2136 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2137 vals[lwrite] = aworkB[l]; 2138 cols[lwrite++] = ctmp; 2139 } 2140 } 2141 for (l=0; l<nzA; l++){ 2142 vals[lwrite] = aworkA[l]; 2143 cols[lwrite++] = cstart + cworkA[l]; 2144 } 2145 for (l=0; l<nzB; l++) { 2146 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2147 vals[lwrite] = aworkB[l]; 2148 cols[lwrite++] = ctmp; 2149 } 2150 } 2151 vals += ncols; 2152 cols += ncols; 2153 rptr[i+1] = rptr[i] + ncols; 2154 if (rownz_max < ncols) rownz_max = ncols; 2155 } 2156 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2157 } else { /* only copy matrix values into sbuf_a */ 2158 rptr = sbuf_j; 2159 vals = sbuf_a; 2160 rptr[0] = 0; 2161 for (i=0; i<rend-rstart; i++){ 2162 row = i + rstart; 2163 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2164 ncols = nzA + nzB; 2165 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2166 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2167 lwrite = 0; 2168 for (l=0; l<nzB; l++) { 2169 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2170 } 2171 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2172 for (l=0; l<nzB; l++) { 2173 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2174 } 2175 vals += ncols; 2176 rptr[i+1] = rptr[i] + ncols; 2177 } 2178 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2179 2180 /* send nzlocal to others, and recv other's nzlocal */ 2181 /*--------------------------------------------------*/ 2182 if (reuse == MAT_INITIAL_MATRIX){ 2183 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2184 s_waits2 = s_waits3 + nsends; 2185 s_waits1 = s_waits2 + nsends; 2186 r_waits1 = s_waits1 + nsends; 2187 r_waits2 = r_waits1 + nrecvs; 2188 r_waits3 = r_waits2 + nrecvs; 2189 } else { 2190 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2191 r_waits3 = s_waits3 + nsends; 2192 } 2193 2194 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2195 if (reuse == MAT_INITIAL_MATRIX){ 2196 /* get new tags to keep the communication clean */ 2197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2198 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2199 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2200 rbuf_nz = sbuf_nz + nsends; 2201 2202 /* post receives of other's nzlocal */ 2203 for (i=0; i<nrecvs; i++){ 2204 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2205 } 2206 /* send nzlocal to others */ 2207 for (i=0; i<nsends; i++){ 2208 sbuf_nz[i] = nzlocal; 2209 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2210 } 2211 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2212 count = nrecvs; 2213 while (count) { 2214 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2215 recv_rank[imdex] = recv_status.MPI_SOURCE; 2216 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2217 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2218 2219 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2220 rbuf_nz[imdex] += i + 2; 2221 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2222 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2223 count--; 2224 } 2225 /* wait on sends of nzlocal */ 2226 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2227 /* send mat->i,j to others, and recv from other's */ 2228 /*------------------------------------------------*/ 2229 for (i=0; i<nsends; i++){ 2230 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2231 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2232 } 2233 /* wait on receives of mat->i,j */ 2234 /*------------------------------*/ 2235 count = nrecvs; 2236 while (count) { 2237 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2238 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2239 count--; 2240 } 2241 /* wait on sends of mat->i,j */ 2242 /*---------------------------*/ 2243 if (nsends) { 2244 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2245 } 2246 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2247 2248 /* post receives, send and receive mat->a */ 2249 /*----------------------------------------*/ 2250 for (imdex=0; imdex<nrecvs; imdex++) { 2251 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2252 } 2253 for (i=0; i<nsends; i++){ 2254 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2255 } 2256 count = nrecvs; 2257 while (count) { 2258 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2259 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2260 count--; 2261 } 2262 if (nsends) { 2263 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2264 } 2265 2266 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2267 2268 /* create redundant matrix */ 2269 /*-------------------------*/ 2270 if (reuse == MAT_INITIAL_MATRIX){ 2271 /* compute rownz_max for preallocation */ 2272 for (imdex=0; imdex<nrecvs; imdex++){ 2273 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2274 rptr = rbuf_j[imdex]; 2275 for (i=0; i<j; i++){ 2276 ncols = rptr[i+1] - rptr[i]; 2277 if (rownz_max < ncols) rownz_max = ncols; 2278 } 2279 } 2280 2281 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2282 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2283 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2284 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2285 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2286 } else { 2287 C = *matredundant; 2288 } 2289 2290 /* insert local matrix entries */ 2291 rptr = sbuf_j; 2292 cols = sbuf_j + rend-rstart + 1; 2293 vals = sbuf_a; 2294 for (i=0; i<rend-rstart; i++){ 2295 row = i + rstart; 2296 ncols = rptr[i+1] - rptr[i]; 2297 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2298 vals += ncols; 2299 cols += ncols; 2300 } 2301 /* insert received matrix entries */ 2302 for (imdex=0; imdex<nrecvs; imdex++){ 2303 rstart = rowrange[recv_rank[imdex]]; 2304 rend = rowrange[recv_rank[imdex]+1]; 2305 rptr = rbuf_j[imdex]; 2306 cols = rbuf_j[imdex] + rend-rstart + 1; 2307 vals = rbuf_a[imdex]; 2308 for (i=0; i<rend-rstart; i++){ 2309 row = i + rstart; 2310 ncols = rptr[i+1] - rptr[i]; 2311 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2312 vals += ncols; 2313 cols += ncols; 2314 } 2315 } 2316 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2317 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2318 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2319 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2320 if (reuse == MAT_INITIAL_MATRIX){ 2321 PetscContainer container; 2322 *matredundant = C; 2323 /* create a supporting struct and attach it to C for reuse */ 2324 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2325 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2326 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2327 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2328 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2329 2330 redund->nzlocal = nzlocal; 2331 redund->nsends = nsends; 2332 redund->nrecvs = nrecvs; 2333 redund->send_rank = send_rank; 2334 redund->sbuf_nz = sbuf_nz; 2335 redund->sbuf_j = sbuf_j; 2336 redund->sbuf_a = sbuf_a; 2337 redund->rbuf_j = rbuf_j; 2338 redund->rbuf_a = rbuf_a; 2339 2340 redund->MatDestroy = C->ops->destroy; 2341 C->ops->destroy = MatDestroy_MatRedundant; 2342 } 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2348 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2349 { 2350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2351 PetscErrorCode ierr; 2352 PetscInt i,*idxb = 0; 2353 PetscScalar *va,*vb; 2354 Vec vtmp; 2355 2356 PetscFunctionBegin; 2357 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2358 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2359 if (idx) { 2360 for (i=0; i<A->rmap->n; i++) { 2361 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2362 } 2363 } 2364 2365 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2366 if (idx) { 2367 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2368 } 2369 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2370 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2371 2372 for (i=0; i<A->rmap->n; i++){ 2373 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2374 va[i] = vb[i]; 2375 if (idx) idx[i] = a->garray[idxb[i]]; 2376 } 2377 } 2378 2379 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2380 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2381 if (idxb) { 2382 ierr = PetscFree(idxb);CHKERRQ(ierr); 2383 } 2384 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2391 { 2392 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2393 PetscErrorCode ierr; 2394 PetscInt i,*idxb = 0; 2395 PetscScalar *va,*vb; 2396 Vec vtmp; 2397 2398 PetscFunctionBegin; 2399 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2400 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2401 if (idx) { 2402 for (i=0; i<A->cmap->n; i++) { 2403 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2404 } 2405 } 2406 2407 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2408 if (idx) { 2409 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2410 } 2411 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2412 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2413 2414 for (i=0; i<A->rmap->n; i++){ 2415 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2416 va[i] = vb[i]; 2417 if (idx) idx[i] = a->garray[idxb[i]]; 2418 } 2419 } 2420 2421 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2422 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2423 if (idxb) { 2424 ierr = PetscFree(idxb);CHKERRQ(ierr); 2425 } 2426 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2432 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2433 { 2434 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2435 PetscInt n = A->rmap->n; 2436 PetscInt cstart = A->cmap->rstart; 2437 PetscInt *cmap = mat->garray; 2438 PetscInt *diagIdx, *offdiagIdx; 2439 Vec diagV, offdiagV; 2440 PetscScalar *a, *diagA, *offdiagA; 2441 PetscInt r; 2442 PetscErrorCode ierr; 2443 2444 PetscFunctionBegin; 2445 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2446 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2447 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2448 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2449 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2450 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2451 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2452 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2453 for(r = 0; r < n; ++r) { 2454 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2455 a[r] = diagA[r]; 2456 idx[r] = cstart + diagIdx[r]; 2457 } else { 2458 a[r] = offdiagA[r]; 2459 idx[r] = cmap[offdiagIdx[r]]; 2460 } 2461 } 2462 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2464 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2465 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2466 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2467 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2473 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2474 { 2475 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2476 PetscInt n = A->rmap->n; 2477 PetscInt cstart = A->cmap->rstart; 2478 PetscInt *cmap = mat->garray; 2479 PetscInt *diagIdx, *offdiagIdx; 2480 Vec diagV, offdiagV; 2481 PetscScalar *a, *diagA, *offdiagA; 2482 PetscInt r; 2483 PetscErrorCode ierr; 2484 2485 PetscFunctionBegin; 2486 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2487 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2488 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2489 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2490 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2491 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2492 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2493 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2494 for(r = 0; r < n; ++r) { 2495 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2496 a[r] = diagA[r]; 2497 idx[r] = cstart + diagIdx[r]; 2498 } else { 2499 a[r] = offdiagA[r]; 2500 idx[r] = cmap[offdiagIdx[r]]; 2501 } 2502 } 2503 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2504 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2505 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2506 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2507 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2508 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2509 PetscFunctionReturn(0); 2510 } 2511 2512 #undef __FUNCT__ 2513 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2514 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2515 { 2516 PetscErrorCode ierr; 2517 2518 PetscFunctionBegin; 2519 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 /* -------------------------------------------------------------------*/ 2524 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2525 MatGetRow_MPIAIJ, 2526 MatRestoreRow_MPIAIJ, 2527 MatMult_MPIAIJ, 2528 /* 4*/ MatMultAdd_MPIAIJ, 2529 MatMultTranspose_MPIAIJ, 2530 MatMultTransposeAdd_MPIAIJ, 2531 #ifdef PETSC_HAVE_PBGL 2532 MatSolve_MPIAIJ, 2533 #else 2534 0, 2535 #endif 2536 0, 2537 0, 2538 /*10*/ 0, 2539 0, 2540 0, 2541 MatRelax_MPIAIJ, 2542 MatTranspose_MPIAIJ, 2543 /*15*/ MatGetInfo_MPIAIJ, 2544 MatEqual_MPIAIJ, 2545 MatGetDiagonal_MPIAIJ, 2546 MatDiagonalScale_MPIAIJ, 2547 MatNorm_MPIAIJ, 2548 /*20*/ MatAssemblyBegin_MPIAIJ, 2549 MatAssemblyEnd_MPIAIJ, 2550 MatSetOption_MPIAIJ, 2551 MatZeroEntries_MPIAIJ, 2552 /*24*/ MatZeroRows_MPIAIJ, 2553 0, 2554 #ifdef PETSC_HAVE_PBGL 2555 0, 2556 #else 2557 0, 2558 #endif 2559 0, 2560 0, 2561 /*29*/ MatSetUpPreallocation_MPIAIJ, 2562 #ifdef PETSC_HAVE_PBGL 2563 0, 2564 #else 2565 0, 2566 #endif 2567 0, 2568 0, 2569 0, 2570 /*34*/ MatDuplicate_MPIAIJ, 2571 0, 2572 0, 2573 0, 2574 0, 2575 /*39*/ MatAXPY_MPIAIJ, 2576 MatGetSubMatrices_MPIAIJ, 2577 MatIncreaseOverlap_MPIAIJ, 2578 MatGetValues_MPIAIJ, 2579 MatCopy_MPIAIJ, 2580 /*44*/ MatGetRowMax_MPIAIJ, 2581 MatScale_MPIAIJ, 2582 0, 2583 0, 2584 0, 2585 /*49*/ MatSetBlockSize_MPIAIJ, 2586 0, 2587 0, 2588 0, 2589 0, 2590 /*54*/ MatFDColoringCreate_MPIAIJ, 2591 0, 2592 MatSetUnfactored_MPIAIJ, 2593 MatPermute_MPIAIJ, 2594 0, 2595 /*59*/ MatGetSubMatrix_MPIAIJ, 2596 MatDestroy_MPIAIJ, 2597 MatView_MPIAIJ, 2598 0, 2599 0, 2600 /*64*/ 0, 2601 0, 2602 0, 2603 0, 2604 0, 2605 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2606 MatGetRowMinAbs_MPIAIJ, 2607 0, 2608 MatSetColoring_MPIAIJ, 2609 #if defined(PETSC_HAVE_ADIC) 2610 MatSetValuesAdic_MPIAIJ, 2611 #else 2612 0, 2613 #endif 2614 MatSetValuesAdifor_MPIAIJ, 2615 /*75*/ 0, 2616 0, 2617 0, 2618 0, 2619 0, 2620 /*80*/ 0, 2621 0, 2622 0, 2623 /*83*/ MatLoad_MPIAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2630 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2631 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2632 MatPtAP_Basic, 2633 MatPtAPSymbolic_MPIAIJ, 2634 /*94*/ MatPtAPNumeric_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*99*/ 0, 2640 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2641 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2642 MatConjugate_MPIAIJ, 2643 0, 2644 /*104*/MatSetValuesRow_MPIAIJ, 2645 MatRealPart_MPIAIJ, 2646 MatImaginaryPart_MPIAIJ, 2647 0, 2648 0, 2649 /*109*/0, 2650 MatGetRedundantMatrix_MPIAIJ, 2651 MatGetRowMin_MPIAIJ, 2652 0, 2653 0, 2654 /*114*/MatGetSeqNonzerostructure_MPIAIJ}; 2655 2656 /* ----------------------------------------------------------------------------------------*/ 2657 2658 EXTERN_C_BEGIN 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2661 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2662 { 2663 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2664 PetscErrorCode ierr; 2665 2666 PetscFunctionBegin; 2667 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2668 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2669 PetscFunctionReturn(0); 2670 } 2671 EXTERN_C_END 2672 2673 EXTERN_C_BEGIN 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2676 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2677 { 2678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2679 PetscErrorCode ierr; 2680 2681 PetscFunctionBegin; 2682 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2683 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2684 PetscFunctionReturn(0); 2685 } 2686 EXTERN_C_END 2687 2688 #include "petscpc.h" 2689 EXTERN_C_BEGIN 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2692 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2693 { 2694 Mat_MPIAIJ *b; 2695 PetscErrorCode ierr; 2696 PetscInt i; 2697 2698 PetscFunctionBegin; 2699 B->preallocated = PETSC_TRUE; 2700 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2701 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2702 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2703 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2704 2705 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2706 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2707 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2708 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2709 if (d_nnz) { 2710 for (i=0; i<B->rmap->n; i++) { 2711 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2712 } 2713 } 2714 if (o_nnz) { 2715 for (i=0; i<B->rmap->n; i++) { 2716 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2717 } 2718 } 2719 b = (Mat_MPIAIJ*)B->data; 2720 2721 /* Explicitly create 2 MATSEQAIJ matrices. */ 2722 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2723 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2724 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2725 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2726 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2727 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2728 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2729 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2730 2731 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2732 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2733 2734 PetscFunctionReturn(0); 2735 } 2736 EXTERN_C_END 2737 2738 #undef __FUNCT__ 2739 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2740 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2741 { 2742 Mat mat; 2743 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 *newmat = 0; 2748 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2749 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2750 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2751 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2752 a = (Mat_MPIAIJ*)mat->data; 2753 2754 mat->factor = matin->factor; 2755 mat->rmap->bs = matin->rmap->bs; 2756 mat->assembled = PETSC_TRUE; 2757 mat->insertmode = NOT_SET_VALUES; 2758 mat->preallocated = PETSC_TRUE; 2759 2760 a->size = oldmat->size; 2761 a->rank = oldmat->rank; 2762 a->donotstash = oldmat->donotstash; 2763 a->roworiented = oldmat->roworiented; 2764 a->rowindices = 0; 2765 a->rowvalues = 0; 2766 a->getrowactive = PETSC_FALSE; 2767 2768 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2769 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2770 2771 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2772 if (oldmat->colmap) { 2773 #if defined (PETSC_USE_CTABLE) 2774 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2775 #else 2776 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2777 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2778 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2779 #endif 2780 } else a->colmap = 0; 2781 if (oldmat->garray) { 2782 PetscInt len; 2783 len = oldmat->B->cmap->n; 2784 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2785 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2786 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2787 } else a->garray = 0; 2788 2789 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2791 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2793 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2795 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2796 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2797 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2798 *newmat = mat; 2799 PetscFunctionReturn(0); 2800 } 2801 2802 #include "petscsys.h" 2803 2804 #undef __FUNCT__ 2805 #define __FUNCT__ "MatLoad_MPIAIJ" 2806 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2807 { 2808 Mat A; 2809 PetscScalar *vals,*svals; 2810 MPI_Comm comm = ((PetscObject)viewer)->comm; 2811 MPI_Status status; 2812 PetscErrorCode ierr; 2813 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2814 PetscInt i,nz,j,rstart,rend,mmax,maxnz; 2815 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2816 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2817 PetscInt cend,cstart,n,*rowners; 2818 int fd; 2819 2820 PetscFunctionBegin; 2821 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2822 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2823 if (!rank) { 2824 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2825 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2826 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2827 } 2828 2829 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2830 M = header[1]; N = header[2]; 2831 /* determine ownership of all rows */ 2832 m = M/size + ((M % size) > rank); 2833 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2834 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2835 2836 /* First process needs enough room for process with most rows */ 2837 if (!rank) { 2838 mmax = rowners[1]; 2839 for (i=2; i<size; i++) { 2840 mmax = PetscMax(mmax,rowners[i]); 2841 } 2842 } else mmax = m; 2843 2844 rowners[0] = 0; 2845 for (i=2; i<=size; i++) { 2846 rowners[i] += rowners[i-1]; 2847 } 2848 rstart = rowners[rank]; 2849 rend = rowners[rank+1]; 2850 2851 /* distribute row lengths to all processors */ 2852 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2853 if (!rank) { 2854 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2855 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2856 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2857 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2858 for (j=0; j<m; j++) { 2859 procsnz[0] += ourlens[j]; 2860 } 2861 for (i=1; i<size; i++) { 2862 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2863 /* calculate the number of nonzeros on each processor */ 2864 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2865 procsnz[i] += rowlengths[j]; 2866 } 2867 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2868 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2869 } 2870 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2871 } else { 2872 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2873 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2874 } 2875 2876 if (!rank) { 2877 /* determine max buffer needed and allocate it */ 2878 maxnz = 0; 2879 for (i=0; i<size; i++) { 2880 maxnz = PetscMax(maxnz,procsnz[i]); 2881 } 2882 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2883 2884 /* read in my part of the matrix column indices */ 2885 nz = procsnz[0]; 2886 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2887 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2888 2889 /* read in every one elses and ship off */ 2890 for (i=1; i<size; i++) { 2891 nz = procsnz[i]; 2892 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2893 mpicnt = PetscMPIIntCast(nz); 2894 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2895 } 2896 ierr = PetscFree(cols);CHKERRQ(ierr); 2897 } else { 2898 /* determine buffer space needed for message */ 2899 nz = 0; 2900 for (i=0; i<m; i++) { 2901 nz += ourlens[i]; 2902 } 2903 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2904 2905 /* receive message of column indices*/ 2906 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2907 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2908 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2909 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2910 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2911 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2912 } 2913 2914 /* determine column ownership if matrix is not square */ 2915 if (N != M) { 2916 n = N/size + ((N % size) > rank); 2917 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2918 cstart = cend - n; 2919 } else { 2920 cstart = rstart; 2921 cend = rend; 2922 n = cend - cstart; 2923 } 2924 2925 /* loop over local rows, determining number of off diagonal entries */ 2926 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2927 jj = 0; 2928 for (i=0; i<m; i++) { 2929 for (j=0; j<ourlens[i]; j++) { 2930 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2931 jj++; 2932 } 2933 } 2934 2935 /* create our matrix */ 2936 for (i=0; i<m; i++) { 2937 ourlens[i] -= offlens[i]; 2938 } 2939 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2940 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2941 ierr = MatSetType(A,type);CHKERRQ(ierr); 2942 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2943 2944 for (i=0; i<m; i++) { 2945 ourlens[i] += offlens[i]; 2946 } 2947 2948 if (!rank) { 2949 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2950 2951 /* read in my part of the matrix numerical values */ 2952 nz = procsnz[0]; 2953 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2954 2955 /* insert into matrix */ 2956 jj = rstart; 2957 smycols = mycols; 2958 svals = vals; 2959 for (i=0; i<m; i++) { 2960 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2961 smycols += ourlens[i]; 2962 svals += ourlens[i]; 2963 jj++; 2964 } 2965 2966 /* read in other processors and ship out */ 2967 for (i=1; i<size; i++) { 2968 nz = procsnz[i]; 2969 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2970 mpicnt = PetscMPIIntCast(nz); 2971 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2972 } 2973 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2974 } else { 2975 /* receive numeric values */ 2976 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2977 2978 /* receive message of values*/ 2979 mpicnt = PetscMPIIntCast(nz); 2980 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2981 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 2982 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2983 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2984 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2985 2986 /* insert into matrix */ 2987 jj = rstart; 2988 smycols = mycols; 2989 svals = vals; 2990 for (i=0; i<m; i++) { 2991 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2992 smycols += ourlens[i]; 2993 svals += ourlens[i]; 2994 jj++; 2995 } 2996 } 2997 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2998 ierr = PetscFree(vals);CHKERRQ(ierr); 2999 ierr = PetscFree(mycols);CHKERRQ(ierr); 3000 ierr = PetscFree(rowners);CHKERRQ(ierr); 3001 3002 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3003 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3004 *newmat = A; 3005 PetscFunctionReturn(0); 3006 } 3007 3008 #undef __FUNCT__ 3009 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3010 /* 3011 Not great since it makes two copies of the submatrix, first an SeqAIJ 3012 in local and then by concatenating the local matrices the end result. 3013 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3014 */ 3015 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3016 { 3017 PetscErrorCode ierr; 3018 PetscMPIInt rank,size; 3019 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3020 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3021 Mat *local,M,Mreuse; 3022 MatScalar *vwork,*aa; 3023 MPI_Comm comm = ((PetscObject)mat)->comm; 3024 Mat_SeqAIJ *aij; 3025 3026 3027 PetscFunctionBegin; 3028 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3029 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3030 3031 if (call == MAT_REUSE_MATRIX) { 3032 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3033 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3034 local = &Mreuse; 3035 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3036 } else { 3037 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3038 Mreuse = *local; 3039 ierr = PetscFree(local);CHKERRQ(ierr); 3040 } 3041 3042 /* 3043 m - number of local rows 3044 n - number of columns (same on all processors) 3045 rstart - first row in new global matrix generated 3046 */ 3047 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3048 if (call == MAT_INITIAL_MATRIX) { 3049 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3050 ii = aij->i; 3051 jj = aij->j; 3052 3053 /* 3054 Determine the number of non-zeros in the diagonal and off-diagonal 3055 portions of the matrix in order to do correct preallocation 3056 */ 3057 3058 /* first get start and end of "diagonal" columns */ 3059 if (csize == PETSC_DECIDE) { 3060 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3061 if (mglobal == n) { /* square matrix */ 3062 nlocal = m; 3063 } else { 3064 nlocal = n/size + ((n % size) > rank); 3065 } 3066 } else { 3067 nlocal = csize; 3068 } 3069 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3070 rstart = rend - nlocal; 3071 if (rank == size - 1 && rend != n) { 3072 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3073 } 3074 3075 /* next, compute all the lengths */ 3076 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3077 olens = dlens + m; 3078 for (i=0; i<m; i++) { 3079 jend = ii[i+1] - ii[i]; 3080 olen = 0; 3081 dlen = 0; 3082 for (j=0; j<jend; j++) { 3083 if (*jj < rstart || *jj >= rend) olen++; 3084 else dlen++; 3085 jj++; 3086 } 3087 olens[i] = olen; 3088 dlens[i] = dlen; 3089 } 3090 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3091 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3092 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3093 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3094 ierr = PetscFree(dlens);CHKERRQ(ierr); 3095 } else { 3096 PetscInt ml,nl; 3097 3098 M = *newmat; 3099 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3100 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3101 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3102 /* 3103 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3104 rather than the slower MatSetValues(). 3105 */ 3106 M->was_assembled = PETSC_TRUE; 3107 M->assembled = PETSC_FALSE; 3108 } 3109 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3110 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3111 ii = aij->i; 3112 jj = aij->j; 3113 aa = aij->a; 3114 for (i=0; i<m; i++) { 3115 row = rstart + i; 3116 nz = ii[i+1] - ii[i]; 3117 cwork = jj; jj += nz; 3118 vwork = aa; aa += nz; 3119 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3120 } 3121 3122 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3123 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3124 *newmat = M; 3125 3126 /* save submatrix used in processor for next request */ 3127 if (call == MAT_INITIAL_MATRIX) { 3128 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3129 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3130 } 3131 3132 PetscFunctionReturn(0); 3133 } 3134 3135 EXTERN_C_BEGIN 3136 #undef __FUNCT__ 3137 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3138 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3139 { 3140 PetscInt m,cstart, cend,j,nnz,i,d; 3141 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3142 const PetscInt *JJ; 3143 PetscScalar *values; 3144 PetscErrorCode ierr; 3145 3146 PetscFunctionBegin; 3147 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3148 3149 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3150 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3151 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3152 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3153 m = B->rmap->n; 3154 cstart = B->cmap->rstart; 3155 cend = B->cmap->rend; 3156 rstart = B->rmap->rstart; 3157 3158 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3159 o_nnz = d_nnz + m; 3160 3161 #if defined(PETSC_USE_DEBUGGING) 3162 for (i=0; i<m; i++) { 3163 nnz = Ii[i+1]- Ii[i]; 3164 JJ = J + Ii[i]; 3165 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3166 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3167 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3168 for (j=1; j<nnz; j++) { 3169 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3170 } 3171 } 3172 #endif 3173 3174 for (i=0; i<m; i++) { 3175 nnz = Ii[i+1]- Ii[i]; 3176 JJ = J + Ii[i]; 3177 nnz_max = PetscMax(nnz_max,nnz); 3178 for (j=0; j<nnz; j++) { 3179 if (*JJ >= cstart) break; 3180 JJ++; 3181 } 3182 d = 0; 3183 for (; j<nnz; j++) { 3184 if (*JJ++ >= cend) break; 3185 d++; 3186 } 3187 d_nnz[i] = d; 3188 o_nnz[i] = nnz - d; 3189 } 3190 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3191 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3192 3193 if (v) values = (PetscScalar*)v; 3194 else { 3195 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3196 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3197 } 3198 3199 for (i=0; i<m; i++) { 3200 ii = i + rstart; 3201 nnz = Ii[i+1]- Ii[i]; 3202 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3203 } 3204 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3205 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3206 3207 if (!v) { 3208 ierr = PetscFree(values);CHKERRQ(ierr); 3209 } 3210 PetscFunctionReturn(0); 3211 } 3212 EXTERN_C_END 3213 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3216 /*@ 3217 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3218 (the default parallel PETSc format). 3219 3220 Collective on MPI_Comm 3221 3222 Input Parameters: 3223 + B - the matrix 3224 . i - the indices into j for the start of each local row (starts with zero) 3225 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3226 - v - optional values in the matrix 3227 3228 Level: developer 3229 3230 Notes: 3231 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3232 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3233 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3234 3235 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3236 3237 The format which is used for the sparse matrix input, is equivalent to a 3238 row-major ordering.. i.e for the following matrix, the input data expected is 3239 as shown: 3240 3241 1 0 0 3242 2 0 3 P0 3243 ------- 3244 4 5 6 P1 3245 3246 Process0 [P0]: rows_owned=[0,1] 3247 i = {0,1,3} [size = nrow+1 = 2+1] 3248 j = {0,0,2} [size = nz = 6] 3249 v = {1,2,3} [size = nz = 6] 3250 3251 Process1 [P1]: rows_owned=[2] 3252 i = {0,3} [size = nrow+1 = 1+1] 3253 j = {0,1,2} [size = nz = 6] 3254 v = {4,5,6} [size = nz = 6] 3255 3256 The column indices for each row MUST be sorted. 3257 3258 .keywords: matrix, aij, compressed row, sparse, parallel 3259 3260 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3261 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3262 @*/ 3263 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3264 { 3265 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3266 3267 PetscFunctionBegin; 3268 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3269 if (f) { 3270 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3271 } 3272 PetscFunctionReturn(0); 3273 } 3274 3275 #undef __FUNCT__ 3276 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3277 /*@C 3278 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3279 (the default parallel PETSc format). For good matrix assembly performance 3280 the user should preallocate the matrix storage by setting the parameters 3281 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3282 performance can be increased by more than a factor of 50. 3283 3284 Collective on MPI_Comm 3285 3286 Input Parameters: 3287 + A - the matrix 3288 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3289 (same value is used for all local rows) 3290 . d_nnz - array containing the number of nonzeros in the various rows of the 3291 DIAGONAL portion of the local submatrix (possibly different for each row) 3292 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3293 The size of this array is equal to the number of local rows, i.e 'm'. 3294 You must leave room for the diagonal entry even if it is zero. 3295 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3296 submatrix (same value is used for all local rows). 3297 - o_nnz - array containing the number of nonzeros in the various rows of the 3298 OFF-DIAGONAL portion of the local submatrix (possibly different for 3299 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3300 structure. The size of this array is equal to the number 3301 of local rows, i.e 'm'. 3302 3303 If the *_nnz parameter is given then the *_nz parameter is ignored 3304 3305 The AIJ format (also called the Yale sparse matrix format or 3306 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3307 storage. The stored row and column indices begin with zero. See the users manual for details. 3308 3309 The parallel matrix is partitioned such that the first m0 rows belong to 3310 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3311 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3312 3313 The DIAGONAL portion of the local submatrix of a processor can be defined 3314 as the submatrix which is obtained by extraction the part corresponding 3315 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3316 first row that belongs to the processor, and r2 is the last row belonging 3317 to the this processor. This is a square mxm matrix. The remaining portion 3318 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3319 3320 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3321 3322 You can call MatGetInfo() to get information on how effective the preallocation was; 3323 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3324 You can also run with the option -info and look for messages with the string 3325 malloc in them to see if additional memory allocation was needed. 3326 3327 Example usage: 3328 3329 Consider the following 8x8 matrix with 34 non-zero values, that is 3330 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3331 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3332 as follows: 3333 3334 .vb 3335 1 2 0 | 0 3 0 | 0 4 3336 Proc0 0 5 6 | 7 0 0 | 8 0 3337 9 0 10 | 11 0 0 | 12 0 3338 ------------------------------------- 3339 13 0 14 | 15 16 17 | 0 0 3340 Proc1 0 18 0 | 19 20 21 | 0 0 3341 0 0 0 | 22 23 0 | 24 0 3342 ------------------------------------- 3343 Proc2 25 26 27 | 0 0 28 | 29 0 3344 30 0 0 | 31 32 33 | 0 34 3345 .ve 3346 3347 This can be represented as a collection of submatrices as: 3348 3349 .vb 3350 A B C 3351 D E F 3352 G H I 3353 .ve 3354 3355 Where the submatrices A,B,C are owned by proc0, D,E,F are 3356 owned by proc1, G,H,I are owned by proc2. 3357 3358 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3359 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3360 The 'M','N' parameters are 8,8, and have the same values on all procs. 3361 3362 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3363 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3364 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3365 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3366 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3367 matrix, ans [DF] as another SeqAIJ matrix. 3368 3369 When d_nz, o_nz parameters are specified, d_nz storage elements are 3370 allocated for every row of the local diagonal submatrix, and o_nz 3371 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3372 One way to choose d_nz and o_nz is to use the max nonzerors per local 3373 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3374 In this case, the values of d_nz,o_nz are: 3375 .vb 3376 proc0 : dnz = 2, o_nz = 2 3377 proc1 : dnz = 3, o_nz = 2 3378 proc2 : dnz = 1, o_nz = 4 3379 .ve 3380 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3381 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3382 for proc3. i.e we are using 12+15+10=37 storage locations to store 3383 34 values. 3384 3385 When d_nnz, o_nnz parameters are specified, the storage is specified 3386 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3387 In the above case the values for d_nnz,o_nnz are: 3388 .vb 3389 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3390 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3391 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3392 .ve 3393 Here the space allocated is sum of all the above values i.e 34, and 3394 hence pre-allocation is perfect. 3395 3396 Level: intermediate 3397 3398 .keywords: matrix, aij, compressed row, sparse, parallel 3399 3400 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3401 MPIAIJ, MatGetInfo() 3402 @*/ 3403 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3404 { 3405 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3406 3407 PetscFunctionBegin; 3408 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3409 if (f) { 3410 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3411 } 3412 PetscFunctionReturn(0); 3413 } 3414 3415 #undef __FUNCT__ 3416 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3417 /*@ 3418 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3419 CSR format the local rows. 3420 3421 Collective on MPI_Comm 3422 3423 Input Parameters: 3424 + comm - MPI communicator 3425 . m - number of local rows (Cannot be PETSC_DECIDE) 3426 . n - This value should be the same as the local size used in creating the 3427 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3428 calculated if N is given) For square matrices n is almost always m. 3429 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3430 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3431 . i - row indices 3432 . j - column indices 3433 - a - matrix values 3434 3435 Output Parameter: 3436 . mat - the matrix 3437 3438 Level: intermediate 3439 3440 Notes: 3441 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3442 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3443 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3444 3445 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3446 3447 The format which is used for the sparse matrix input, is equivalent to a 3448 row-major ordering.. i.e for the following matrix, the input data expected is 3449 as shown: 3450 3451 1 0 0 3452 2 0 3 P0 3453 ------- 3454 4 5 6 P1 3455 3456 Process0 [P0]: rows_owned=[0,1] 3457 i = {0,1,3} [size = nrow+1 = 2+1] 3458 j = {0,0,2} [size = nz = 6] 3459 v = {1,2,3} [size = nz = 6] 3460 3461 Process1 [P1]: rows_owned=[2] 3462 i = {0,3} [size = nrow+1 = 1+1] 3463 j = {0,1,2} [size = nz = 6] 3464 v = {4,5,6} [size = nz = 6] 3465 3466 .keywords: matrix, aij, compressed row, sparse, parallel 3467 3468 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3469 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3470 @*/ 3471 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3472 { 3473 PetscErrorCode ierr; 3474 3475 PetscFunctionBegin; 3476 if (i[0]) { 3477 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3478 } 3479 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3480 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3481 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3482 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3483 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3484 PetscFunctionReturn(0); 3485 } 3486 3487 #undef __FUNCT__ 3488 #define __FUNCT__ "MatCreateMPIAIJ" 3489 /*@C 3490 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3491 (the default parallel PETSc format). For good matrix assembly performance 3492 the user should preallocate the matrix storage by setting the parameters 3493 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3494 performance can be increased by more than a factor of 50. 3495 3496 Collective on MPI_Comm 3497 3498 Input Parameters: 3499 + comm - MPI communicator 3500 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3501 This value should be the same as the local size used in creating the 3502 y vector for the matrix-vector product y = Ax. 3503 . n - This value should be the same as the local size used in creating the 3504 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3505 calculated if N is given) For square matrices n is almost always m. 3506 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3507 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3508 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3509 (same value is used for all local rows) 3510 . d_nnz - array containing the number of nonzeros in the various rows of the 3511 DIAGONAL portion of the local submatrix (possibly different for each row) 3512 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3513 The size of this array is equal to the number of local rows, i.e 'm'. 3514 You must leave room for the diagonal entry even if it is zero. 3515 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3516 submatrix (same value is used for all local rows). 3517 - o_nnz - array containing the number of nonzeros in the various rows of the 3518 OFF-DIAGONAL portion of the local submatrix (possibly different for 3519 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3520 structure. The size of this array is equal to the number 3521 of local rows, i.e 'm'. 3522 3523 Output Parameter: 3524 . A - the matrix 3525 3526 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3527 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3528 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3529 3530 Notes: 3531 If the *_nnz parameter is given then the *_nz parameter is ignored 3532 3533 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3534 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3535 storage requirements for this matrix. 3536 3537 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3538 processor than it must be used on all processors that share the object for 3539 that argument. 3540 3541 The user MUST specify either the local or global matrix dimensions 3542 (possibly both). 3543 3544 The parallel matrix is partitioned across processors such that the 3545 first m0 rows belong to process 0, the next m1 rows belong to 3546 process 1, the next m2 rows belong to process 2 etc.. where 3547 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3548 values corresponding to [m x N] submatrix. 3549 3550 The columns are logically partitioned with the n0 columns belonging 3551 to 0th partition, the next n1 columns belonging to the next 3552 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3553 3554 The DIAGONAL portion of the local submatrix on any given processor 3555 is the submatrix corresponding to the rows and columns m,n 3556 corresponding to the given processor. i.e diagonal matrix on 3557 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3558 etc. The remaining portion of the local submatrix [m x (N-n)] 3559 constitute the OFF-DIAGONAL portion. The example below better 3560 illustrates this concept. 3561 3562 For a square global matrix we define each processor's diagonal portion 3563 to be its local rows and the corresponding columns (a square submatrix); 3564 each processor's off-diagonal portion encompasses the remainder of the 3565 local matrix (a rectangular submatrix). 3566 3567 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3568 3569 When calling this routine with a single process communicator, a matrix of 3570 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3571 type of communicator, use the construction mechanism: 3572 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3573 3574 By default, this format uses inodes (identical nodes) when possible. 3575 We search for consecutive rows with the same nonzero structure, thereby 3576 reusing matrix information to achieve increased efficiency. 3577 3578 Options Database Keys: 3579 + -mat_no_inode - Do not use inodes 3580 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3581 - -mat_aij_oneindex - Internally use indexing starting at 1 3582 rather than 0. Note that when calling MatSetValues(), 3583 the user still MUST index entries starting at 0! 3584 3585 3586 Example usage: 3587 3588 Consider the following 8x8 matrix with 34 non-zero values, that is 3589 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3590 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3591 as follows: 3592 3593 .vb 3594 1 2 0 | 0 3 0 | 0 4 3595 Proc0 0 5 6 | 7 0 0 | 8 0 3596 9 0 10 | 11 0 0 | 12 0 3597 ------------------------------------- 3598 13 0 14 | 15 16 17 | 0 0 3599 Proc1 0 18 0 | 19 20 21 | 0 0 3600 0 0 0 | 22 23 0 | 24 0 3601 ------------------------------------- 3602 Proc2 25 26 27 | 0 0 28 | 29 0 3603 30 0 0 | 31 32 33 | 0 34 3604 .ve 3605 3606 This can be represented as a collection of submatrices as: 3607 3608 .vb 3609 A B C 3610 D E F 3611 G H I 3612 .ve 3613 3614 Where the submatrices A,B,C are owned by proc0, D,E,F are 3615 owned by proc1, G,H,I are owned by proc2. 3616 3617 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3618 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3619 The 'M','N' parameters are 8,8, and have the same values on all procs. 3620 3621 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3622 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3623 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3624 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3625 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3626 matrix, ans [DF] as another SeqAIJ matrix. 3627 3628 When d_nz, o_nz parameters are specified, d_nz storage elements are 3629 allocated for every row of the local diagonal submatrix, and o_nz 3630 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3631 One way to choose d_nz and o_nz is to use the max nonzerors per local 3632 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3633 In this case, the values of d_nz,o_nz are: 3634 .vb 3635 proc0 : dnz = 2, o_nz = 2 3636 proc1 : dnz = 3, o_nz = 2 3637 proc2 : dnz = 1, o_nz = 4 3638 .ve 3639 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3640 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3641 for proc3. i.e we are using 12+15+10=37 storage locations to store 3642 34 values. 3643 3644 When d_nnz, o_nnz parameters are specified, the storage is specified 3645 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3646 In the above case the values for d_nnz,o_nnz are: 3647 .vb 3648 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3649 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3650 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3651 .ve 3652 Here the space allocated is sum of all the above values i.e 34, and 3653 hence pre-allocation is perfect. 3654 3655 Level: intermediate 3656 3657 .keywords: matrix, aij, compressed row, sparse, parallel 3658 3659 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3660 MPIAIJ, MatCreateMPIAIJWithArrays() 3661 @*/ 3662 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3663 { 3664 PetscErrorCode ierr; 3665 PetscMPIInt size; 3666 3667 PetscFunctionBegin; 3668 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3669 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3670 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3671 if (size > 1) { 3672 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3673 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3674 } else { 3675 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3676 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3677 } 3678 PetscFunctionReturn(0); 3679 } 3680 3681 #undef __FUNCT__ 3682 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3683 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3684 { 3685 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3686 3687 PetscFunctionBegin; 3688 *Ad = a->A; 3689 *Ao = a->B; 3690 *colmap = a->garray; 3691 PetscFunctionReturn(0); 3692 } 3693 3694 #undef __FUNCT__ 3695 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3696 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3697 { 3698 PetscErrorCode ierr; 3699 PetscInt i; 3700 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3701 3702 PetscFunctionBegin; 3703 if (coloring->ctype == IS_COLORING_GLOBAL) { 3704 ISColoringValue *allcolors,*colors; 3705 ISColoring ocoloring; 3706 3707 /* set coloring for diagonal portion */ 3708 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3709 3710 /* set coloring for off-diagonal portion */ 3711 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3712 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3713 for (i=0; i<a->B->cmap->n; i++) { 3714 colors[i] = allcolors[a->garray[i]]; 3715 } 3716 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3717 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3718 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3719 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3720 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3721 ISColoringValue *colors; 3722 PetscInt *larray; 3723 ISColoring ocoloring; 3724 3725 /* set coloring for diagonal portion */ 3726 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3727 for (i=0; i<a->A->cmap->n; i++) { 3728 larray[i] = i + A->cmap->rstart; 3729 } 3730 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3731 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3732 for (i=0; i<a->A->cmap->n; i++) { 3733 colors[i] = coloring->colors[larray[i]]; 3734 } 3735 ierr = PetscFree(larray);CHKERRQ(ierr); 3736 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3737 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3738 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3739 3740 /* set coloring for off-diagonal portion */ 3741 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3742 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3743 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3744 for (i=0; i<a->B->cmap->n; i++) { 3745 colors[i] = coloring->colors[larray[i]]; 3746 } 3747 ierr = PetscFree(larray);CHKERRQ(ierr); 3748 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3749 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3750 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3751 } else { 3752 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3753 } 3754 3755 PetscFunctionReturn(0); 3756 } 3757 3758 #if defined(PETSC_HAVE_ADIC) 3759 #undef __FUNCT__ 3760 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3761 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3762 { 3763 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3764 PetscErrorCode ierr; 3765 3766 PetscFunctionBegin; 3767 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3768 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3769 PetscFunctionReturn(0); 3770 } 3771 #endif 3772 3773 #undef __FUNCT__ 3774 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3775 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3776 { 3777 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3778 PetscErrorCode ierr; 3779 3780 PetscFunctionBegin; 3781 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3782 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3783 PetscFunctionReturn(0); 3784 } 3785 3786 #undef __FUNCT__ 3787 #define __FUNCT__ "MatMerge" 3788 /*@ 3789 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3790 matrices from each processor 3791 3792 Collective on MPI_Comm 3793 3794 Input Parameters: 3795 + comm - the communicators the parallel matrix will live on 3796 . inmat - the input sequential matrices 3797 . n - number of local columns (or PETSC_DECIDE) 3798 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3799 3800 Output Parameter: 3801 . outmat - the parallel matrix generated 3802 3803 Level: advanced 3804 3805 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3806 3807 @*/ 3808 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3809 { 3810 PetscErrorCode ierr; 3811 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3812 PetscInt *indx; 3813 PetscScalar *values; 3814 3815 PetscFunctionBegin; 3816 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3817 if (scall == MAT_INITIAL_MATRIX){ 3818 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3819 if (n == PETSC_DECIDE){ 3820 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3821 } 3822 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3823 rstart -= m; 3824 3825 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3826 for (i=0;i<m;i++) { 3827 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3828 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3829 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3830 } 3831 /* This routine will ONLY return MPIAIJ type matrix */ 3832 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3833 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3834 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3835 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3836 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3837 3838 } else if (scall == MAT_REUSE_MATRIX){ 3839 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3840 } else { 3841 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3842 } 3843 3844 for (i=0;i<m;i++) { 3845 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3846 Ii = i + rstart; 3847 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3848 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3849 } 3850 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3851 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3852 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3853 3854 PetscFunctionReturn(0); 3855 } 3856 3857 #undef __FUNCT__ 3858 #define __FUNCT__ "MatFileSplit" 3859 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3860 { 3861 PetscErrorCode ierr; 3862 PetscMPIInt rank; 3863 PetscInt m,N,i,rstart,nnz; 3864 size_t len; 3865 const PetscInt *indx; 3866 PetscViewer out; 3867 char *name; 3868 Mat B; 3869 const PetscScalar *values; 3870 3871 PetscFunctionBegin; 3872 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3873 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3874 /* Should this be the type of the diagonal block of A? */ 3875 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3876 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3877 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3878 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3879 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3880 for (i=0;i<m;i++) { 3881 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3882 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3883 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3884 } 3885 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3886 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3887 3888 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3889 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3890 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3891 sprintf(name,"%s.%d",outfile,rank); 3892 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3893 ierr = PetscFree(name); 3894 ierr = MatView(B,out);CHKERRQ(ierr); 3895 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3896 ierr = MatDestroy(B);CHKERRQ(ierr); 3897 PetscFunctionReturn(0); 3898 } 3899 3900 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3901 #undef __FUNCT__ 3902 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3903 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3904 { 3905 PetscErrorCode ierr; 3906 Mat_Merge_SeqsToMPI *merge; 3907 PetscContainer container; 3908 3909 PetscFunctionBegin; 3910 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3911 if (container) { 3912 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3913 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3914 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3915 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3916 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3917 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3918 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3919 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3920 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3921 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3922 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3923 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3924 3925 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3926 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3927 } 3928 ierr = PetscFree(merge);CHKERRQ(ierr); 3929 3930 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3931 PetscFunctionReturn(0); 3932 } 3933 3934 #include "../src/mat/utils/freespace.h" 3935 #include "petscbt.h" 3936 3937 #undef __FUNCT__ 3938 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3939 /*@C 3940 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3941 matrices from each processor 3942 3943 Collective on MPI_Comm 3944 3945 Input Parameters: 3946 + comm - the communicators the parallel matrix will live on 3947 . seqmat - the input sequential matrices 3948 . m - number of local rows (or PETSC_DECIDE) 3949 . n - number of local columns (or PETSC_DECIDE) 3950 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3951 3952 Output Parameter: 3953 . mpimat - the parallel matrix generated 3954 3955 Level: advanced 3956 3957 Notes: 3958 The dimensions of the sequential matrix in each processor MUST be the same. 3959 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3960 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3961 @*/ 3962 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3963 { 3964 PetscErrorCode ierr; 3965 MPI_Comm comm=((PetscObject)mpimat)->comm; 3966 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3967 PetscMPIInt size,rank,taga,*len_s; 3968 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 3969 PetscInt proc,m; 3970 PetscInt **buf_ri,**buf_rj; 3971 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3972 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3973 MPI_Request *s_waits,*r_waits; 3974 MPI_Status *status; 3975 MatScalar *aa=a->a; 3976 MatScalar **abuf_r,*ba_i; 3977 Mat_Merge_SeqsToMPI *merge; 3978 PetscContainer container; 3979 3980 PetscFunctionBegin; 3981 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3982 3983 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3984 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3985 3986 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3987 if (container) { 3988 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3989 } 3990 bi = merge->bi; 3991 bj = merge->bj; 3992 buf_ri = merge->buf_ri; 3993 buf_rj = merge->buf_rj; 3994 3995 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3996 owners = merge->rowmap.range; 3997 len_s = merge->len_s; 3998 3999 /* send and recv matrix values */ 4000 /*-----------------------------*/ 4001 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4002 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4003 4004 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4005 for (proc=0,k=0; proc<size; proc++){ 4006 if (!len_s[proc]) continue; 4007 i = owners[proc]; 4008 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4009 k++; 4010 } 4011 4012 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4013 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4014 ierr = PetscFree(status);CHKERRQ(ierr); 4015 4016 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4017 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4018 4019 /* insert mat values of mpimat */ 4020 /*----------------------------*/ 4021 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4022 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4023 nextrow = buf_ri_k + merge->nrecv; 4024 nextai = nextrow + merge->nrecv; 4025 4026 for (k=0; k<merge->nrecv; k++){ 4027 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4028 nrows = *(buf_ri_k[k]); 4029 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4030 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4031 } 4032 4033 /* set values of ba */ 4034 m = merge->rowmap.n; 4035 for (i=0; i<m; i++) { 4036 arow = owners[rank] + i; 4037 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4038 bnzi = bi[i+1] - bi[i]; 4039 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4040 4041 /* add local non-zero vals of this proc's seqmat into ba */ 4042 anzi = ai[arow+1] - ai[arow]; 4043 aj = a->j + ai[arow]; 4044 aa = a->a + ai[arow]; 4045 nextaj = 0; 4046 for (j=0; nextaj<anzi; j++){ 4047 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4048 ba_i[j] += aa[nextaj++]; 4049 } 4050 } 4051 4052 /* add received vals into ba */ 4053 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4054 /* i-th row */ 4055 if (i == *nextrow[k]) { 4056 anzi = *(nextai[k]+1) - *nextai[k]; 4057 aj = buf_rj[k] + *(nextai[k]); 4058 aa = abuf_r[k] + *(nextai[k]); 4059 nextaj = 0; 4060 for (j=0; nextaj<anzi; j++){ 4061 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4062 ba_i[j] += aa[nextaj++]; 4063 } 4064 } 4065 nextrow[k]++; nextai[k]++; 4066 } 4067 } 4068 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4069 } 4070 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4071 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4072 4073 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4074 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4075 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4076 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4077 PetscFunctionReturn(0); 4078 } 4079 4080 #undef __FUNCT__ 4081 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4082 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4083 { 4084 PetscErrorCode ierr; 4085 Mat B_mpi; 4086 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4087 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4088 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4089 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4090 PetscInt len,proc,*dnz,*onz; 4091 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4092 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4093 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4094 MPI_Status *status; 4095 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4096 PetscBT lnkbt; 4097 Mat_Merge_SeqsToMPI *merge; 4098 PetscContainer container; 4099 4100 PetscFunctionBegin; 4101 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4102 4103 /* make sure it is a PETSc comm */ 4104 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4105 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4106 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4107 4108 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4109 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4110 4111 /* determine row ownership */ 4112 /*---------------------------------------------------------*/ 4113 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4114 merge->rowmap.n = m; 4115 merge->rowmap.N = M; 4116 merge->rowmap.bs = 1; 4117 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4118 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4119 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4120 4121 m = merge->rowmap.n; 4122 M = merge->rowmap.N; 4123 owners = merge->rowmap.range; 4124 4125 /* determine the number of messages to send, their lengths */ 4126 /*---------------------------------------------------------*/ 4127 len_s = merge->len_s; 4128 4129 len = 0; /* length of buf_si[] */ 4130 merge->nsend = 0; 4131 for (proc=0; proc<size; proc++){ 4132 len_si[proc] = 0; 4133 if (proc == rank){ 4134 len_s[proc] = 0; 4135 } else { 4136 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4137 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4138 } 4139 if (len_s[proc]) { 4140 merge->nsend++; 4141 nrows = 0; 4142 for (i=owners[proc]; i<owners[proc+1]; i++){ 4143 if (ai[i+1] > ai[i]) nrows++; 4144 } 4145 len_si[proc] = 2*(nrows+1); 4146 len += len_si[proc]; 4147 } 4148 } 4149 4150 /* determine the number and length of messages to receive for ij-structure */ 4151 /*-------------------------------------------------------------------------*/ 4152 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4153 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4154 4155 /* post the Irecv of j-structure */ 4156 /*-------------------------------*/ 4157 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4158 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4159 4160 /* post the Isend of j-structure */ 4161 /*--------------------------------*/ 4162 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4163 sj_waits = si_waits + merge->nsend; 4164 4165 for (proc=0, k=0; proc<size; proc++){ 4166 if (!len_s[proc]) continue; 4167 i = owners[proc]; 4168 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4169 k++; 4170 } 4171 4172 /* receives and sends of j-structure are complete */ 4173 /*------------------------------------------------*/ 4174 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4175 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4176 4177 /* send and recv i-structure */ 4178 /*---------------------------*/ 4179 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4180 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4181 4182 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4183 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4184 for (proc=0,k=0; proc<size; proc++){ 4185 if (!len_s[proc]) continue; 4186 /* form outgoing message for i-structure: 4187 buf_si[0]: nrows to be sent 4188 [1:nrows]: row index (global) 4189 [nrows+1:2*nrows+1]: i-structure index 4190 */ 4191 /*-------------------------------------------*/ 4192 nrows = len_si[proc]/2 - 1; 4193 buf_si_i = buf_si + nrows+1; 4194 buf_si[0] = nrows; 4195 buf_si_i[0] = 0; 4196 nrows = 0; 4197 for (i=owners[proc]; i<owners[proc+1]; i++){ 4198 anzi = ai[i+1] - ai[i]; 4199 if (anzi) { 4200 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4201 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4202 nrows++; 4203 } 4204 } 4205 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4206 k++; 4207 buf_si += len_si[proc]; 4208 } 4209 4210 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4211 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4212 4213 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4214 for (i=0; i<merge->nrecv; i++){ 4215 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4216 } 4217 4218 ierr = PetscFree(len_si);CHKERRQ(ierr); 4219 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4220 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4221 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4222 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4223 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4224 ierr = PetscFree(status);CHKERRQ(ierr); 4225 4226 /* compute a local seq matrix in each processor */ 4227 /*----------------------------------------------*/ 4228 /* allocate bi array and free space for accumulating nonzero column info */ 4229 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4230 bi[0] = 0; 4231 4232 /* create and initialize a linked list */ 4233 nlnk = N+1; 4234 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4235 4236 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4237 len = 0; 4238 len = ai[owners[rank+1]] - ai[owners[rank]]; 4239 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4240 current_space = free_space; 4241 4242 /* determine symbolic info for each local row */ 4243 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4244 nextrow = buf_ri_k + merge->nrecv; 4245 nextai = nextrow + merge->nrecv; 4246 for (k=0; k<merge->nrecv; k++){ 4247 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4248 nrows = *buf_ri_k[k]; 4249 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4250 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4251 } 4252 4253 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4254 len = 0; 4255 for (i=0;i<m;i++) { 4256 bnzi = 0; 4257 /* add local non-zero cols of this proc's seqmat into lnk */ 4258 arow = owners[rank] + i; 4259 anzi = ai[arow+1] - ai[arow]; 4260 aj = a->j + ai[arow]; 4261 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4262 bnzi += nlnk; 4263 /* add received col data into lnk */ 4264 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4265 if (i == *nextrow[k]) { /* i-th row */ 4266 anzi = *(nextai[k]+1) - *nextai[k]; 4267 aj = buf_rj[k] + *nextai[k]; 4268 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4269 bnzi += nlnk; 4270 nextrow[k]++; nextai[k]++; 4271 } 4272 } 4273 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4274 4275 /* if free space is not available, make more free space */ 4276 if (current_space->local_remaining<bnzi) { 4277 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4278 nspacedouble++; 4279 } 4280 /* copy data into free space, then initialize lnk */ 4281 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4282 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4283 4284 current_space->array += bnzi; 4285 current_space->local_used += bnzi; 4286 current_space->local_remaining -= bnzi; 4287 4288 bi[i+1] = bi[i] + bnzi; 4289 } 4290 4291 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4292 4293 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4294 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4295 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4296 4297 /* create symbolic parallel matrix B_mpi */ 4298 /*---------------------------------------*/ 4299 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4300 if (n==PETSC_DECIDE) { 4301 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4302 } else { 4303 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4304 } 4305 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4306 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4307 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4308 4309 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4310 B_mpi->assembled = PETSC_FALSE; 4311 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4312 merge->bi = bi; 4313 merge->bj = bj; 4314 merge->buf_ri = buf_ri; 4315 merge->buf_rj = buf_rj; 4316 merge->coi = PETSC_NULL; 4317 merge->coj = PETSC_NULL; 4318 merge->owners_co = PETSC_NULL; 4319 4320 /* attach the supporting struct to B_mpi for reuse */ 4321 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4322 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4323 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4324 *mpimat = B_mpi; 4325 4326 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4327 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4328 PetscFunctionReturn(0); 4329 } 4330 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "MatMerge_SeqsToMPI" 4333 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4334 { 4335 PetscErrorCode ierr; 4336 4337 PetscFunctionBegin; 4338 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4339 if (scall == MAT_INITIAL_MATRIX){ 4340 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4341 } 4342 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4343 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4344 PetscFunctionReturn(0); 4345 } 4346 4347 #undef __FUNCT__ 4348 #define __FUNCT__ "MatGetLocalMat" 4349 /*@ 4350 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4351 4352 Not Collective 4353 4354 Input Parameters: 4355 + A - the matrix 4356 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4357 4358 Output Parameter: 4359 . A_loc - the local sequential matrix generated 4360 4361 Level: developer 4362 4363 @*/ 4364 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4365 { 4366 PetscErrorCode ierr; 4367 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4368 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4369 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4370 MatScalar *aa=a->a,*ba=b->a,*cam; 4371 PetscScalar *ca; 4372 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4373 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4374 4375 PetscFunctionBegin; 4376 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4377 if (scall == MAT_INITIAL_MATRIX){ 4378 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4379 ci[0] = 0; 4380 for (i=0; i<am; i++){ 4381 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4382 } 4383 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4384 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4385 k = 0; 4386 for (i=0; i<am; i++) { 4387 ncols_o = bi[i+1] - bi[i]; 4388 ncols_d = ai[i+1] - ai[i]; 4389 /* off-diagonal portion of A */ 4390 for (jo=0; jo<ncols_o; jo++) { 4391 col = cmap[*bj]; 4392 if (col >= cstart) break; 4393 cj[k] = col; bj++; 4394 ca[k++] = *ba++; 4395 } 4396 /* diagonal portion of A */ 4397 for (j=0; j<ncols_d; j++) { 4398 cj[k] = cstart + *aj++; 4399 ca[k++] = *aa++; 4400 } 4401 /* off-diagonal portion of A */ 4402 for (j=jo; j<ncols_o; j++) { 4403 cj[k] = cmap[*bj++]; 4404 ca[k++] = *ba++; 4405 } 4406 } 4407 /* put together the new matrix */ 4408 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4409 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4410 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4411 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4412 mat->free_a = PETSC_TRUE; 4413 mat->free_ij = PETSC_TRUE; 4414 mat->nonew = 0; 4415 } else if (scall == MAT_REUSE_MATRIX){ 4416 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4417 ci = mat->i; cj = mat->j; cam = mat->a; 4418 for (i=0; i<am; i++) { 4419 /* off-diagonal portion of A */ 4420 ncols_o = bi[i+1] - bi[i]; 4421 for (jo=0; jo<ncols_o; jo++) { 4422 col = cmap[*bj]; 4423 if (col >= cstart) break; 4424 *cam++ = *ba++; bj++; 4425 } 4426 /* diagonal portion of A */ 4427 ncols_d = ai[i+1] - ai[i]; 4428 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4429 /* off-diagonal portion of A */ 4430 for (j=jo; j<ncols_o; j++) { 4431 *cam++ = *ba++; bj++; 4432 } 4433 } 4434 } else { 4435 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4436 } 4437 4438 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4439 PetscFunctionReturn(0); 4440 } 4441 4442 #undef __FUNCT__ 4443 #define __FUNCT__ "MatGetLocalMatCondensed" 4444 /*@C 4445 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4446 4447 Not Collective 4448 4449 Input Parameters: 4450 + A - the matrix 4451 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4452 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4453 4454 Output Parameter: 4455 . A_loc - the local sequential matrix generated 4456 4457 Level: developer 4458 4459 @*/ 4460 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4461 { 4462 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4463 PetscErrorCode ierr; 4464 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4465 IS isrowa,iscola; 4466 Mat *aloc; 4467 4468 PetscFunctionBegin; 4469 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4470 if (!row){ 4471 start = A->rmap->rstart; end = A->rmap->rend; 4472 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4473 } else { 4474 isrowa = *row; 4475 } 4476 if (!col){ 4477 start = A->cmap->rstart; 4478 cmap = a->garray; 4479 nzA = a->A->cmap->n; 4480 nzB = a->B->cmap->n; 4481 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4482 ncols = 0; 4483 for (i=0; i<nzB; i++) { 4484 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4485 else break; 4486 } 4487 imark = i; 4488 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4489 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4490 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4491 ierr = PetscFree(idx);CHKERRQ(ierr); 4492 } else { 4493 iscola = *col; 4494 } 4495 if (scall != MAT_INITIAL_MATRIX){ 4496 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4497 aloc[0] = *A_loc; 4498 } 4499 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4500 *A_loc = aloc[0]; 4501 ierr = PetscFree(aloc);CHKERRQ(ierr); 4502 if (!row){ 4503 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4504 } 4505 if (!col){ 4506 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4507 } 4508 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "MatGetBrowsOfAcols" 4514 /*@C 4515 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4516 4517 Collective on Mat 4518 4519 Input Parameters: 4520 + A,B - the matrices in mpiaij format 4521 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4522 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4523 4524 Output Parameter: 4525 + rowb, colb - index sets of rows and columns of B to extract 4526 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4527 - B_seq - the sequential matrix generated 4528 4529 Level: developer 4530 4531 @*/ 4532 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4533 { 4534 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4535 PetscErrorCode ierr; 4536 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4537 IS isrowb,iscolb; 4538 Mat *bseq; 4539 4540 PetscFunctionBegin; 4541 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4542 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4543 } 4544 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4545 4546 if (scall == MAT_INITIAL_MATRIX){ 4547 start = A->cmap->rstart; 4548 cmap = a->garray; 4549 nzA = a->A->cmap->n; 4550 nzB = a->B->cmap->n; 4551 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4552 ncols = 0; 4553 for (i=0; i<nzB; i++) { /* row < local row index */ 4554 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4555 else break; 4556 } 4557 imark = i; 4558 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4559 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4560 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4561 ierr = PetscFree(idx);CHKERRQ(ierr); 4562 *brstart = imark; 4563 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4564 } else { 4565 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4566 isrowb = *rowb; iscolb = *colb; 4567 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4568 bseq[0] = *B_seq; 4569 } 4570 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4571 *B_seq = bseq[0]; 4572 ierr = PetscFree(bseq);CHKERRQ(ierr); 4573 if (!rowb){ 4574 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4575 } else { 4576 *rowb = isrowb; 4577 } 4578 if (!colb){ 4579 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4580 } else { 4581 *colb = iscolb; 4582 } 4583 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4584 PetscFunctionReturn(0); 4585 } 4586 4587 #undef __FUNCT__ 4588 #define __FUNCT__ "MatGetBrowsOfAoCols" 4589 /*@C 4590 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4591 of the OFF-DIAGONAL portion of local A 4592 4593 Collective on Mat 4594 4595 Input Parameters: 4596 + A,B - the matrices in mpiaij format 4597 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4598 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4599 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4600 4601 Output Parameter: 4602 + B_oth - the sequential matrix generated 4603 4604 Level: developer 4605 4606 @*/ 4607 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4608 { 4609 VecScatter_MPI_General *gen_to,*gen_from; 4610 PetscErrorCode ierr; 4611 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4612 Mat_SeqAIJ *b_oth; 4613 VecScatter ctx=a->Mvctx; 4614 MPI_Comm comm=((PetscObject)ctx)->comm; 4615 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4616 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4617 PetscScalar *rvalues,*svalues; 4618 MatScalar *b_otha,*bufa,*bufA; 4619 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4620 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4621 MPI_Status *sstatus,rstatus; 4622 PetscMPIInt jj; 4623 PetscInt *cols,sbs,rbs; 4624 PetscScalar *vals; 4625 4626 PetscFunctionBegin; 4627 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4628 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4629 } 4630 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4631 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4632 4633 gen_to = (VecScatter_MPI_General*)ctx->todata; 4634 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4635 rvalues = gen_from->values; /* holds the length of receiving row */ 4636 svalues = gen_to->values; /* holds the length of sending row */ 4637 nrecvs = gen_from->n; 4638 nsends = gen_to->n; 4639 4640 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4641 srow = gen_to->indices; /* local row index to be sent */ 4642 sstarts = gen_to->starts; 4643 sprocs = gen_to->procs; 4644 sstatus = gen_to->sstatus; 4645 sbs = gen_to->bs; 4646 rstarts = gen_from->starts; 4647 rprocs = gen_from->procs; 4648 rbs = gen_from->bs; 4649 4650 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4651 if (scall == MAT_INITIAL_MATRIX){ 4652 /* i-array */ 4653 /*---------*/ 4654 /* post receives */ 4655 for (i=0; i<nrecvs; i++){ 4656 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4657 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4658 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4659 } 4660 4661 /* pack the outgoing message */ 4662 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4663 rstartsj = sstartsj + nsends +1; 4664 sstartsj[0] = 0; rstartsj[0] = 0; 4665 len = 0; /* total length of j or a array to be sent */ 4666 k = 0; 4667 for (i=0; i<nsends; i++){ 4668 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4669 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4670 for (j=0; j<nrows; j++) { 4671 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4672 for (l=0; l<sbs; l++){ 4673 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4674 rowlen[j*sbs+l] = ncols; 4675 len += ncols; 4676 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4677 } 4678 k++; 4679 } 4680 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4681 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4682 } 4683 /* recvs and sends of i-array are completed */ 4684 i = nrecvs; 4685 while (i--) { 4686 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4687 } 4688 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4689 4690 /* allocate buffers for sending j and a arrays */ 4691 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4692 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4693 4694 /* create i-array of B_oth */ 4695 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4696 b_othi[0] = 0; 4697 len = 0; /* total length of j or a array to be received */ 4698 k = 0; 4699 for (i=0; i<nrecvs; i++){ 4700 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4701 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4702 for (j=0; j<nrows; j++) { 4703 b_othi[k+1] = b_othi[k] + rowlen[j]; 4704 len += rowlen[j]; k++; 4705 } 4706 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4707 } 4708 4709 /* allocate space for j and a arrrays of B_oth */ 4710 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4711 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4712 4713 /* j-array */ 4714 /*---------*/ 4715 /* post receives of j-array */ 4716 for (i=0; i<nrecvs; i++){ 4717 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4718 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4719 } 4720 4721 /* pack the outgoing message j-array */ 4722 k = 0; 4723 for (i=0; i<nsends; i++){ 4724 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4725 bufJ = bufj+sstartsj[i]; 4726 for (j=0; j<nrows; j++) { 4727 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4728 for (ll=0; ll<sbs; ll++){ 4729 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4730 for (l=0; l<ncols; l++){ 4731 *bufJ++ = cols[l]; 4732 } 4733 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4734 } 4735 } 4736 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4737 } 4738 4739 /* recvs and sends of j-array are completed */ 4740 i = nrecvs; 4741 while (i--) { 4742 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4743 } 4744 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4745 } else if (scall == MAT_REUSE_MATRIX){ 4746 sstartsj = *startsj; 4747 rstartsj = sstartsj + nsends +1; 4748 bufa = *bufa_ptr; 4749 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4750 b_otha = b_oth->a; 4751 } else { 4752 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4753 } 4754 4755 /* a-array */ 4756 /*---------*/ 4757 /* post receives of a-array */ 4758 for (i=0; i<nrecvs; i++){ 4759 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4760 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4761 } 4762 4763 /* pack the outgoing message a-array */ 4764 k = 0; 4765 for (i=0; i<nsends; i++){ 4766 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4767 bufA = bufa+sstartsj[i]; 4768 for (j=0; j<nrows; j++) { 4769 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4770 for (ll=0; ll<sbs; ll++){ 4771 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4772 for (l=0; l<ncols; l++){ 4773 *bufA++ = vals[l]; 4774 } 4775 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4776 } 4777 } 4778 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4779 } 4780 /* recvs and sends of a-array are completed */ 4781 i = nrecvs; 4782 while (i--) { 4783 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4784 } 4785 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4786 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4787 4788 if (scall == MAT_INITIAL_MATRIX){ 4789 /* put together the new matrix */ 4790 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4791 4792 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4793 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4794 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4795 b_oth->free_a = PETSC_TRUE; 4796 b_oth->free_ij = PETSC_TRUE; 4797 b_oth->nonew = 0; 4798 4799 ierr = PetscFree(bufj);CHKERRQ(ierr); 4800 if (!startsj || !bufa_ptr){ 4801 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4802 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4803 } else { 4804 *startsj = sstartsj; 4805 *bufa_ptr = bufa; 4806 } 4807 } 4808 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4809 PetscFunctionReturn(0); 4810 } 4811 4812 #undef __FUNCT__ 4813 #define __FUNCT__ "MatGetCommunicationStructs" 4814 /*@C 4815 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4816 4817 Not Collective 4818 4819 Input Parameters: 4820 . A - The matrix in mpiaij format 4821 4822 Output Parameter: 4823 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4824 . colmap - A map from global column index to local index into lvec 4825 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4826 4827 Level: developer 4828 4829 @*/ 4830 #if defined (PETSC_USE_CTABLE) 4831 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4832 #else 4833 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4834 #endif 4835 { 4836 Mat_MPIAIJ *a; 4837 4838 PetscFunctionBegin; 4839 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4840 PetscValidPointer(lvec, 2) 4841 PetscValidPointer(colmap, 3) 4842 PetscValidPointer(multScatter, 4) 4843 a = (Mat_MPIAIJ *) A->data; 4844 if (lvec) *lvec = a->lvec; 4845 if (colmap) *colmap = a->colmap; 4846 if (multScatter) *multScatter = a->Mvctx; 4847 PetscFunctionReturn(0); 4848 } 4849 4850 EXTERN_C_BEGIN 4851 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4852 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4853 EXTERN_C_END 4854 4855 #include "../src/mat/impls/dense/mpi/mpidense.h" 4856 4857 #undef __FUNCT__ 4858 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4859 /* 4860 Computes (B'*A')' since computing B*A directly is untenable 4861 4862 n p p 4863 ( ) ( ) ( ) 4864 m ( A ) * n ( B ) = m ( C ) 4865 ( ) ( ) ( ) 4866 4867 */ 4868 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4869 { 4870 PetscErrorCode ierr; 4871 Mat At,Bt,Ct; 4872 4873 PetscFunctionBegin; 4874 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4875 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4876 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4877 ierr = MatDestroy(At);CHKERRQ(ierr); 4878 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4879 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4880 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4881 PetscFunctionReturn(0); 4882 } 4883 4884 #undef __FUNCT__ 4885 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4886 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4887 { 4888 PetscErrorCode ierr; 4889 PetscInt m=A->rmap->n,n=B->cmap->n; 4890 Mat Cmat; 4891 4892 PetscFunctionBegin; 4893 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4894 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4895 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4896 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4897 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4898 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4899 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4900 *C = Cmat; 4901 PetscFunctionReturn(0); 4902 } 4903 4904 /* ----------------------------------------------------------------*/ 4905 #undef __FUNCT__ 4906 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4907 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4908 { 4909 PetscErrorCode ierr; 4910 4911 PetscFunctionBegin; 4912 if (scall == MAT_INITIAL_MATRIX){ 4913 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4914 } 4915 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4916 PetscFunctionReturn(0); 4917 } 4918 4919 EXTERN_C_BEGIN 4920 #if defined(PETSC_HAVE_MUMPS) 4921 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4922 #endif 4923 #if defined(PETSC_HAVE_PASTIX) 4924 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4925 #endif 4926 #if defined(PETSC_HAVE_SUPERLU_DIST) 4927 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4928 #endif 4929 #if defined(PETSC_HAVE_SPOOLES) 4930 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4931 #endif 4932 EXTERN_C_END 4933 4934 /*MC 4935 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4936 4937 Options Database Keys: 4938 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4939 4940 Level: beginner 4941 4942 .seealso: MatCreateMPIAIJ() 4943 M*/ 4944 4945 EXTERN_C_BEGIN 4946 #undef __FUNCT__ 4947 #define __FUNCT__ "MatCreate_MPIAIJ" 4948 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4949 { 4950 Mat_MPIAIJ *b; 4951 PetscErrorCode ierr; 4952 PetscMPIInt size; 4953 4954 PetscFunctionBegin; 4955 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4956 4957 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4958 B->data = (void*)b; 4959 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4960 B->rmap->bs = 1; 4961 B->assembled = PETSC_FALSE; 4962 B->mapping = 0; 4963 4964 B->insertmode = NOT_SET_VALUES; 4965 b->size = size; 4966 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4967 4968 /* build cache for off array entries formed */ 4969 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4970 b->donotstash = PETSC_FALSE; 4971 b->colmap = 0; 4972 b->garray = 0; 4973 b->roworiented = PETSC_TRUE; 4974 4975 /* stuff used for matrix vector multiply */ 4976 b->lvec = PETSC_NULL; 4977 b->Mvctx = PETSC_NULL; 4978 4979 /* stuff for MatGetRow() */ 4980 b->rowindices = 0; 4981 b->rowvalues = 0; 4982 b->getrowactive = PETSC_FALSE; 4983 4984 #if defined(PETSC_HAVE_SPOOLES) 4985 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 4986 "MatGetFactor_mpiaij_spooles", 4987 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 4988 #endif 4989 #if defined(PETSC_HAVE_MUMPS) 4990 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 4991 "MatGetFactor_mpiaij_mumps", 4992 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 4993 #endif 4994 #if defined(PETSC_HAVE_PASTIX) 4995 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 4996 "MatGetFactor_mpiaij_pastix", 4997 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 4998 #endif 4999 #if defined(PETSC_HAVE_SUPERLU_DIST) 5000 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 5001 "MatGetFactor_mpiaij_superlu_dist", 5002 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5003 #endif 5004 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5005 "MatStoreValues_MPIAIJ", 5006 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5007 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5008 "MatRetrieveValues_MPIAIJ", 5009 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5010 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5011 "MatGetDiagonalBlock_MPIAIJ", 5012 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5013 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5014 "MatIsTranspose_MPIAIJ", 5015 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5016 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5017 "MatMPIAIJSetPreallocation_MPIAIJ", 5018 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5019 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5020 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5021 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5022 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5023 "MatDiagonalScaleLocal_MPIAIJ", 5024 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5025 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5026 "MatConvert_MPIAIJ_MPICSRPERM", 5027 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5028 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5029 "MatConvert_MPIAIJ_MPICRL", 5030 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5031 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5032 "MatMatMult_MPIDense_MPIAIJ", 5033 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5034 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5035 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5036 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5037 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5038 "MatMatMultNumeric_MPIDense_MPIAIJ", 5039 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5040 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5041 PetscFunctionReturn(0); 5042 } 5043 EXTERN_C_END 5044 5045 #undef __FUNCT__ 5046 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5047 /*@ 5048 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5049 and "off-diagonal" part of the matrix in CSR format. 5050 5051 Collective on MPI_Comm 5052 5053 Input Parameters: 5054 + comm - MPI communicator 5055 . m - number of local rows (Cannot be PETSC_DECIDE) 5056 . n - This value should be the same as the local size used in creating the 5057 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5058 calculated if N is given) For square matrices n is almost always m. 5059 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5060 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5061 . i - row indices for "diagonal" portion of matrix 5062 . j - column indices 5063 . a - matrix values 5064 . oi - row indices for "off-diagonal" portion of matrix 5065 . oj - column indices 5066 - oa - matrix values 5067 5068 Output Parameter: 5069 . mat - the matrix 5070 5071 Level: advanced 5072 5073 Notes: 5074 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5075 5076 The i and j indices are 0 based 5077 5078 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5079 5080 5081 .keywords: matrix, aij, compressed row, sparse, parallel 5082 5083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5084 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5085 @*/ 5086 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5087 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5088 { 5089 PetscErrorCode ierr; 5090 Mat_MPIAIJ *maij; 5091 5092 PetscFunctionBegin; 5093 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5094 if (i[0]) { 5095 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5096 } 5097 if (oi[0]) { 5098 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5099 } 5100 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5101 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5102 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5103 maij = (Mat_MPIAIJ*) (*mat)->data; 5104 maij->donotstash = PETSC_TRUE; 5105 (*mat)->preallocated = PETSC_TRUE; 5106 5107 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5108 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5109 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5110 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5111 5112 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5113 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5114 5115 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5116 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5117 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5118 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5119 5120 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5121 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5122 PetscFunctionReturn(0); 5123 } 5124 5125 /* 5126 Special version for direct calls from Fortran 5127 */ 5128 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5129 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5130 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5131 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5132 #endif 5133 5134 /* Change these macros so can be used in void function */ 5135 #undef CHKERRQ 5136 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5137 #undef SETERRQ2 5138 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5139 #undef SETERRQ 5140 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5141 5142 EXTERN_C_BEGIN 5143 #undef __FUNCT__ 5144 #define __FUNCT__ "matsetvaluesmpiaij_" 5145 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5146 { 5147 Mat mat = *mmat; 5148 PetscInt m = *mm, n = *mn; 5149 InsertMode addv = *maddv; 5150 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5151 PetscScalar value; 5152 PetscErrorCode ierr; 5153 5154 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5155 if (mat->insertmode == NOT_SET_VALUES) { 5156 mat->insertmode = addv; 5157 } 5158 #if defined(PETSC_USE_DEBUG) 5159 else if (mat->insertmode != addv) { 5160 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5161 } 5162 #endif 5163 { 5164 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5165 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5166 PetscTruth roworiented = aij->roworiented; 5167 5168 /* Some Variables required in the macro */ 5169 Mat A = aij->A; 5170 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5171 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5172 MatScalar *aa = a->a; 5173 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5174 Mat B = aij->B; 5175 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5176 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5177 MatScalar *ba = b->a; 5178 5179 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5180 PetscInt nonew = a->nonew; 5181 MatScalar *ap1,*ap2; 5182 5183 PetscFunctionBegin; 5184 for (i=0; i<m; i++) { 5185 if (im[i] < 0) continue; 5186 #if defined(PETSC_USE_DEBUG) 5187 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5188 #endif 5189 if (im[i] >= rstart && im[i] < rend) { 5190 row = im[i] - rstart; 5191 lastcol1 = -1; 5192 rp1 = aj + ai[row]; 5193 ap1 = aa + ai[row]; 5194 rmax1 = aimax[row]; 5195 nrow1 = ailen[row]; 5196 low1 = 0; 5197 high1 = nrow1; 5198 lastcol2 = -1; 5199 rp2 = bj + bi[row]; 5200 ap2 = ba + bi[row]; 5201 rmax2 = bimax[row]; 5202 nrow2 = bilen[row]; 5203 low2 = 0; 5204 high2 = nrow2; 5205 5206 for (j=0; j<n; j++) { 5207 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5208 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5209 if (in[j] >= cstart && in[j] < cend){ 5210 col = in[j] - cstart; 5211 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5212 } else if (in[j] < 0) continue; 5213 #if defined(PETSC_USE_DEBUG) 5214 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5215 #endif 5216 else { 5217 if (mat->was_assembled) { 5218 if (!aij->colmap) { 5219 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5220 } 5221 #if defined (PETSC_USE_CTABLE) 5222 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5223 col--; 5224 #else 5225 col = aij->colmap[in[j]] - 1; 5226 #endif 5227 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5228 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5229 col = in[j]; 5230 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5231 B = aij->B; 5232 b = (Mat_SeqAIJ*)B->data; 5233 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5234 rp2 = bj + bi[row]; 5235 ap2 = ba + bi[row]; 5236 rmax2 = bimax[row]; 5237 nrow2 = bilen[row]; 5238 low2 = 0; 5239 high2 = nrow2; 5240 bm = aij->B->rmap->n; 5241 ba = b->a; 5242 } 5243 } else col = in[j]; 5244 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5245 } 5246 } 5247 } else { 5248 if (!aij->donotstash) { 5249 if (roworiented) { 5250 if (ignorezeroentries && v[i*n] == 0.0) continue; 5251 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5252 } else { 5253 if (ignorezeroentries && v[i] == 0.0) continue; 5254 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5255 } 5256 } 5257 } 5258 }} 5259 PetscFunctionReturnVoid(); 5260 } 5261 EXTERN_C_END 5262 5263