1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "../src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 452 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscErrorCode ierr; 456 PetscInt nstash,reallocs; 457 InsertMode addv; 458 459 PetscFunctionBegin; 460 if (aij->donotstash) { 461 PetscFunctionReturn(0); 462 } 463 464 /* make sure all processors are either in INSERTMODE or ADDMODE */ 465 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 466 if (addv == (ADD_VALUES|INSERT_VALUES)) { 467 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 468 } 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscTruth other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 a->compressedrow.use = PETSC_FALSE; 511 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 512 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 513 514 /* determine if any processor has disassembled, if so we must 515 also disassemble ourselfs, in order that we may reassemble. */ 516 /* 517 if nonzero structure of submatrix B cannot change then we know that 518 no processor disassembled thus we can skip this stuff 519 */ 520 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 521 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 522 if (mat->was_assembled && !other_disassembled) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 } 525 } 526 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 527 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 530 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 531 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 533 534 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 535 aij->rowvalues = 0; 536 537 /* used by MatAXPY() */ 538 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 539 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 540 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 546 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 547 { 548 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 549 PetscErrorCode ierr; 550 551 PetscFunctionBegin; 552 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 553 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 554 PetscFunctionReturn(0); 555 } 556 557 #undef __FUNCT__ 558 #define __FUNCT__ "MatZeroRows_MPIAIJ" 559 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 560 { 561 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 562 PetscErrorCode ierr; 563 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 564 PetscInt i,*owners = A->rmap->range; 565 PetscInt *nprocs,j,idx,nsends,row; 566 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 567 PetscInt *rvalues,count,base,slen,*source; 568 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 569 MPI_Comm comm = ((PetscObject)A)->comm; 570 MPI_Request *send_waits,*recv_waits; 571 MPI_Status recv_status,*send_status; 572 #if defined(PETSC_DEBUG) 573 PetscTruth found = PETSC_FALSE; 574 #endif 575 576 PetscFunctionBegin; 577 /* first count number of contributors to each processor */ 578 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 579 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 580 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 581 j = 0; 582 for (i=0; i<N; i++) { 583 if (lastidx > (idx = rows[i])) j = 0; 584 lastidx = idx; 585 for (; j<size; j++) { 586 if (idx >= owners[j] && idx < owners[j+1]) { 587 nprocs[2*j]++; 588 nprocs[2*j+1] = 1; 589 owner[i] = j; 590 #if defined(PETSC_DEBUG) 591 found = PETSC_TRUE; 592 #endif 593 break; 594 } 595 } 596 #if defined(PETSC_DEBUG) 597 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 598 found = PETSC_FALSE; 599 #endif 600 } 601 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 602 603 /* inform other processors of number of messages and max length*/ 604 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 605 606 /* post receives: */ 607 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 608 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 609 for (i=0; i<nrecvs; i++) { 610 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 611 } 612 613 /* do sends: 614 1) starts[i] gives the starting index in svalues for stuff going to 615 the ith processor 616 */ 617 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 618 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 619 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 620 starts[0] = 0; 621 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 622 for (i=0; i<N; i++) { 623 svalues[starts[owner[i]]++] = rows[i]; 624 } 625 626 starts[0] = 0; 627 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 628 count = 0; 629 for (i=0; i<size; i++) { 630 if (nprocs[2*i+1]) { 631 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 632 } 633 } 634 ierr = PetscFree(starts);CHKERRQ(ierr); 635 636 base = owners[rank]; 637 638 /* wait on receives */ 639 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 640 source = lens + nrecvs; 641 count = nrecvs; slen = 0; 642 while (count) { 643 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 644 /* unpack receives into our local space */ 645 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 646 source[imdex] = recv_status.MPI_SOURCE; 647 lens[imdex] = n; 648 slen += n; 649 count--; 650 } 651 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 652 653 /* move the data into the send scatter */ 654 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 655 count = 0; 656 for (i=0; i<nrecvs; i++) { 657 values = rvalues + i*nmax; 658 for (j=0; j<lens[i]; j++) { 659 lrows[count++] = values[j] - base; 660 } 661 } 662 ierr = PetscFree(rvalues);CHKERRQ(ierr); 663 ierr = PetscFree(lens);CHKERRQ(ierr); 664 ierr = PetscFree(owner);CHKERRQ(ierr); 665 ierr = PetscFree(nprocs);CHKERRQ(ierr); 666 667 /* actually zap the local rows */ 668 /* 669 Zero the required rows. If the "diagonal block" of the matrix 670 is square and the user wishes to set the diagonal we use separate 671 code so that MatSetValues() is not called for each diagonal allocating 672 new memory, thus calling lots of mallocs and slowing things down. 673 674 Contributed by: Matthew Knepley 675 */ 676 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 677 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 678 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 679 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 680 } else if (diag != 0.0) { 681 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 682 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 683 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 684 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 685 } 686 for (i = 0; i < slen; i++) { 687 row = lrows[i] + rstart; 688 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 689 } 690 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 692 } else { 693 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 694 } 695 ierr = PetscFree(lrows);CHKERRQ(ierr); 696 697 /* wait on sends */ 698 if (nsends) { 699 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 700 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 701 ierr = PetscFree(send_status);CHKERRQ(ierr); 702 } 703 ierr = PetscFree(send_waits);CHKERRQ(ierr); 704 ierr = PetscFree(svalues);CHKERRQ(ierr); 705 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNCT__ 710 #define __FUNCT__ "MatMult_MPIAIJ" 711 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 712 { 713 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 714 PetscErrorCode ierr; 715 PetscInt nt; 716 717 PetscFunctionBegin; 718 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 719 if (nt != A->cmap->n) { 720 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 721 } 722 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 723 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 724 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatMultAdd_MPIAIJ" 731 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 732 { 733 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 738 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 739 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 746 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 747 { 748 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 PetscTruth merged; 751 752 PetscFunctionBegin; 753 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 754 /* do nondiagonal part */ 755 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 756 if (!merged) { 757 /* send it on its way */ 758 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 759 /* do local part */ 760 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 761 /* receive remote parts: note this assumes the values are not actually */ 762 /* added in yy until the next line, */ 763 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 764 } else { 765 /* do local part */ 766 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 767 /* send it on its way */ 768 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 769 /* values actually were received in the Begin() but we need to call this nop */ 770 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 } 772 PetscFunctionReturn(0); 773 } 774 775 EXTERN_C_BEGIN 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 778 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 779 { 780 MPI_Comm comm; 781 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 782 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 783 IS Me,Notme; 784 PetscErrorCode ierr; 785 PetscInt M,N,first,last,*notme,i; 786 PetscMPIInt size; 787 788 PetscFunctionBegin; 789 790 /* Easy test: symmetric diagonal block */ 791 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 792 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 793 if (!*f) PetscFunctionReturn(0); 794 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 795 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 796 if (size == 1) PetscFunctionReturn(0); 797 798 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 799 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 800 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 801 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 802 for (i=0; i<first; i++) notme[i] = i; 803 for (i=last; i<M; i++) notme[i-last+first] = i; 804 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 805 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 806 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 807 Aoff = Aoffs[0]; 808 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 809 Boff = Boffs[0]; 810 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 811 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 812 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 813 ierr = ISDestroy(Me);CHKERRQ(ierr); 814 ierr = ISDestroy(Notme);CHKERRQ(ierr); 815 816 PetscFunctionReturn(0); 817 } 818 EXTERN_C_END 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 822 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 823 { 824 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 825 PetscErrorCode ierr; 826 827 PetscFunctionBegin; 828 /* do nondiagonal part */ 829 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 830 /* send it on its way */ 831 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 832 /* do local part */ 833 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 834 /* receive remote parts */ 835 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 836 PetscFunctionReturn(0); 837 } 838 839 /* 840 This only works correctly for square matrices where the subblock A->A is the 841 diagonal block 842 */ 843 #undef __FUNCT__ 844 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 845 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 846 { 847 PetscErrorCode ierr; 848 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 849 850 PetscFunctionBegin; 851 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 852 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 853 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 854 } 855 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatScale_MPIAIJ" 861 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 862 { 863 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 864 PetscErrorCode ierr; 865 866 PetscFunctionBegin; 867 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 868 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 869 PetscFunctionReturn(0); 870 } 871 872 #undef __FUNCT__ 873 #define __FUNCT__ "MatDestroy_MPIAIJ" 874 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 875 { 876 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 877 PetscErrorCode ierr; 878 879 PetscFunctionBegin; 880 #if defined(PETSC_USE_LOG) 881 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 882 #endif 883 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 884 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 885 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 886 #if defined (PETSC_USE_CTABLE) 887 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 888 #else 889 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 890 #endif 891 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 892 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 893 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 894 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 895 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 896 ierr = PetscFree(aij);CHKERRQ(ierr); 897 898 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 899 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 900 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 909 #undef __FUNCT__ 910 #define __FUNCT__ "MatView_MPIAIJ_Binary" 911 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 912 { 913 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 914 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 915 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 916 PetscErrorCode ierr; 917 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 918 int fd; 919 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 920 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 921 PetscScalar *column_values; 922 923 PetscFunctionBegin; 924 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 925 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 926 nz = A->nz + B->nz; 927 if (!rank) { 928 header[0] = MAT_FILE_COOKIE; 929 header[1] = mat->rmap->N; 930 header[2] = mat->cmap->N; 931 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 932 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 933 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 934 /* get largest number of rows any processor has */ 935 rlen = mat->rmap->n; 936 range = mat->rmap->range; 937 for (i=1; i<size; i++) { 938 rlen = PetscMax(rlen,range[i+1] - range[i]); 939 } 940 } else { 941 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 942 rlen = mat->rmap->n; 943 } 944 945 /* load up the local row counts */ 946 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 947 for (i=0; i<mat->rmap->n; i++) { 948 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 949 } 950 951 /* store the row lengths to the file */ 952 if (!rank) { 953 MPI_Status status; 954 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 955 for (i=1; i<size; i++) { 956 rlen = range[i+1] - range[i]; 957 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 958 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 959 } 960 } else { 961 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 962 } 963 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 964 965 /* load up the local column indices */ 966 nzmax = nz; /* )th processor needs space a largest processor needs */ 967 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 968 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 969 cnt = 0; 970 for (i=0; i<mat->rmap->n; i++) { 971 for (j=B->i[i]; j<B->i[i+1]; j++) { 972 if ( (col = garray[B->j[j]]) > cstart) break; 973 column_indices[cnt++] = col; 974 } 975 for (k=A->i[i]; k<A->i[i+1]; k++) { 976 column_indices[cnt++] = A->j[k] + cstart; 977 } 978 for (; j<B->i[i+1]; j++) { 979 column_indices[cnt++] = garray[B->j[j]]; 980 } 981 } 982 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 983 984 /* store the column indices to the file */ 985 if (!rank) { 986 MPI_Status status; 987 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 988 for (i=1; i<size; i++) { 989 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 990 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 991 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 993 } 994 } else { 995 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 996 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 997 } 998 ierr = PetscFree(column_indices);CHKERRQ(ierr); 999 1000 /* load up the local column values */ 1001 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1002 cnt = 0; 1003 for (i=0; i<mat->rmap->n; i++) { 1004 for (j=B->i[i]; j<B->i[i+1]; j++) { 1005 if ( garray[B->j[j]] > cstart) break; 1006 column_values[cnt++] = B->a[j]; 1007 } 1008 for (k=A->i[i]; k<A->i[i+1]; k++) { 1009 column_values[cnt++] = A->a[k]; 1010 } 1011 for (; j<B->i[i+1]; j++) { 1012 column_values[cnt++] = B->a[j]; 1013 } 1014 } 1015 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1016 1017 /* store the column values to the file */ 1018 if (!rank) { 1019 MPI_Status status; 1020 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1021 for (i=1; i<size; i++) { 1022 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1023 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1024 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1026 } 1027 } else { 1028 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1029 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1030 } 1031 ierr = PetscFree(column_values);CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } 1034 1035 #undef __FUNCT__ 1036 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1037 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1038 { 1039 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1040 PetscErrorCode ierr; 1041 PetscMPIInt rank = aij->rank,size = aij->size; 1042 PetscTruth isdraw,iascii,isbinary; 1043 PetscViewer sviewer; 1044 PetscViewerFormat format; 1045 1046 PetscFunctionBegin; 1047 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1048 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1050 if (iascii) { 1051 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1052 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1053 MatInfo info; 1054 PetscTruth inodes; 1055 1056 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1057 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1058 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1059 if (!inodes) { 1060 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1061 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1062 } else { 1063 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1064 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1065 } 1066 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1067 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1068 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1072 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1075 PetscInt inodecount,inodelimit,*inodes; 1076 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1077 if (inodes) { 1078 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1079 } else { 1080 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1081 } 1082 PetscFunctionReturn(0); 1083 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1084 PetscFunctionReturn(0); 1085 } 1086 } else if (isbinary) { 1087 if (size == 1) { 1088 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1089 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1090 } else { 1091 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } else if (isdraw) { 1095 PetscDraw draw; 1096 PetscTruth isnull; 1097 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1098 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1099 } 1100 1101 if (size == 1) { 1102 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1103 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1104 } else { 1105 /* assemble the entire matrix onto first processor. */ 1106 Mat A; 1107 Mat_SeqAIJ *Aloc; 1108 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1109 MatScalar *a; 1110 1111 if (mat->rmap->N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 1112 1113 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1114 if (!rank) { 1115 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1116 } else { 1117 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1118 } 1119 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1120 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1121 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1123 1124 /* copy over the A part */ 1125 Aloc = (Mat_SeqAIJ*)aij->A->data; 1126 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1127 row = mat->rmap->rstart; 1128 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1129 for (i=0; i<m; i++) { 1130 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1131 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1132 } 1133 aj = Aloc->j; 1134 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1135 1136 /* copy over the B part */ 1137 Aloc = (Mat_SeqAIJ*)aij->B->data; 1138 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1139 row = mat->rmap->rstart; 1140 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1141 ct = cols; 1142 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1143 for (i=0; i<m; i++) { 1144 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1145 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1146 } 1147 ierr = PetscFree(ct);CHKERRQ(ierr); 1148 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1149 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1150 /* 1151 Everyone has to call to draw the matrix since the graphics waits are 1152 synchronized across all processors that share the PetscDraw object 1153 */ 1154 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1155 if (!rank) { 1156 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1157 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1158 } 1159 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1160 ierr = MatDestroy(A);CHKERRQ(ierr); 1161 } 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatView_MPIAIJ" 1167 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1168 { 1169 PetscErrorCode ierr; 1170 PetscTruth iascii,isdraw,issocket,isbinary; 1171 1172 PetscFunctionBegin; 1173 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1174 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1175 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1176 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1177 if (iascii || isdraw || isbinary || issocket) { 1178 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1179 } else { 1180 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatRelax_MPIAIJ" 1187 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1188 { 1189 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1190 PetscErrorCode ierr; 1191 Vec bb1; 1192 1193 PetscFunctionBegin; 1194 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1195 1196 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1197 if (flag & SOR_ZERO_INITIAL_GUESS) { 1198 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1199 its--; 1200 } 1201 1202 while (its--) { 1203 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1204 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1205 1206 /* update rhs: bb1 = bb - B*x */ 1207 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1208 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1209 1210 /* local sweep */ 1211 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1212 } 1213 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1214 if (flag & SOR_ZERO_INITIAL_GUESS) { 1215 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1216 its--; 1217 } 1218 while (its--) { 1219 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1220 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1221 1222 /* update rhs: bb1 = bb - B*x */ 1223 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1224 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1225 1226 /* local sweep */ 1227 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1228 } 1229 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1230 if (flag & SOR_ZERO_INITIAL_GUESS) { 1231 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1232 its--; 1233 } 1234 while (its--) { 1235 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1237 1238 /* update rhs: bb1 = bb - B*x */ 1239 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1240 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1241 1242 /* local sweep */ 1243 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1244 } 1245 } else { 1246 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1247 } 1248 1249 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatPermute_MPIAIJ" 1255 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1256 { 1257 MPI_Comm comm,pcomm; 1258 PetscInt first,local_size,nrows; 1259 const PetscInt *rows; 1260 int ntids; 1261 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1262 PetscErrorCode ierr; 1263 1264 PetscFunctionBegin; 1265 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1266 /* make a collective version of 'rowp' */ 1267 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1268 if (pcomm==comm) { 1269 crowp = rowp; 1270 } else { 1271 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1272 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1273 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1274 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1275 } 1276 /* collect the global row permutation and invert it */ 1277 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1278 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1279 if (pcomm!=comm) { 1280 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1281 } 1282 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1283 /* get the local target indices */ 1284 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1285 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1286 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1287 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1288 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1289 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1290 /* the column permutation is so much easier; 1291 make a local version of 'colp' and invert it */ 1292 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1293 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1294 if (ntids==1) { 1295 lcolp = colp; 1296 } else { 1297 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1298 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1299 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1300 } 1301 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1302 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1303 if (ntids>1) { 1304 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1305 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1306 } 1307 /* now we just get the submatrix */ 1308 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1309 /* clean up */ 1310 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1311 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1317 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1318 { 1319 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1320 Mat A = mat->A,B = mat->B; 1321 PetscErrorCode ierr; 1322 PetscReal isend[5],irecv[5]; 1323 1324 PetscFunctionBegin; 1325 info->block_size = 1.0; 1326 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1327 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1328 isend[3] = info->memory; isend[4] = info->mallocs; 1329 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1330 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1331 isend[3] += info->memory; isend[4] += info->mallocs; 1332 if (flag == MAT_LOCAL) { 1333 info->nz_used = isend[0]; 1334 info->nz_allocated = isend[1]; 1335 info->nz_unneeded = isend[2]; 1336 info->memory = isend[3]; 1337 info->mallocs = isend[4]; 1338 } else if (flag == MAT_GLOBAL_MAX) { 1339 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1340 info->nz_used = irecv[0]; 1341 info->nz_allocated = irecv[1]; 1342 info->nz_unneeded = irecv[2]; 1343 info->memory = irecv[3]; 1344 info->mallocs = irecv[4]; 1345 } else if (flag == MAT_GLOBAL_SUM) { 1346 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1347 info->nz_used = irecv[0]; 1348 info->nz_allocated = irecv[1]; 1349 info->nz_unneeded = irecv[2]; 1350 info->memory = irecv[3]; 1351 info->mallocs = irecv[4]; 1352 } 1353 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1354 info->fill_ratio_needed = 0; 1355 info->factor_mallocs = 0; 1356 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatSetOption_MPIAIJ" 1362 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1363 { 1364 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1365 PetscErrorCode ierr; 1366 1367 PetscFunctionBegin; 1368 switch (op) { 1369 case MAT_NEW_NONZERO_LOCATIONS: 1370 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1371 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1372 case MAT_KEEP_ZEROED_ROWS: 1373 case MAT_NEW_NONZERO_LOCATION_ERR: 1374 case MAT_USE_INODES: 1375 case MAT_IGNORE_ZERO_ENTRIES: 1376 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1377 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1378 break; 1379 case MAT_ROW_ORIENTED: 1380 a->roworiented = flg; 1381 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1382 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1383 break; 1384 case MAT_NEW_DIAGONALS: 1385 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1386 break; 1387 case MAT_IGNORE_OFF_PROC_ENTRIES: 1388 a->donotstash = PETSC_TRUE; 1389 break; 1390 case MAT_SYMMETRIC: 1391 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1392 break; 1393 case MAT_STRUCTURALLY_SYMMETRIC: 1394 case MAT_HERMITIAN: 1395 case MAT_SYMMETRY_ETERNAL: 1396 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1397 break; 1398 default: 1399 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatGetRow_MPIAIJ" 1406 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1407 { 1408 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1409 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1410 PetscErrorCode ierr; 1411 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1412 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1413 PetscInt *cmap,*idx_p; 1414 1415 PetscFunctionBegin; 1416 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1417 mat->getrowactive = PETSC_TRUE; 1418 1419 if (!mat->rowvalues && (idx || v)) { 1420 /* 1421 allocate enough space to hold information from the longest row. 1422 */ 1423 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1424 PetscInt max = 1,tmp; 1425 for (i=0; i<matin->rmap->n; i++) { 1426 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1427 if (max < tmp) { max = tmp; } 1428 } 1429 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1430 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1431 } 1432 1433 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1434 lrow = row - rstart; 1435 1436 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1437 if (!v) {pvA = 0; pvB = 0;} 1438 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1439 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1440 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1441 nztot = nzA + nzB; 1442 1443 cmap = mat->garray; 1444 if (v || idx) { 1445 if (nztot) { 1446 /* Sort by increasing column numbers, assuming A and B already sorted */ 1447 PetscInt imark = -1; 1448 if (v) { 1449 *v = v_p = mat->rowvalues; 1450 for (i=0; i<nzB; i++) { 1451 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1452 else break; 1453 } 1454 imark = i; 1455 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1456 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1457 } 1458 if (idx) { 1459 *idx = idx_p = mat->rowindices; 1460 if (imark > -1) { 1461 for (i=0; i<imark; i++) { 1462 idx_p[i] = cmap[cworkB[i]]; 1463 } 1464 } else { 1465 for (i=0; i<nzB; i++) { 1466 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1467 else break; 1468 } 1469 imark = i; 1470 } 1471 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1472 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1473 } 1474 } else { 1475 if (idx) *idx = 0; 1476 if (v) *v = 0; 1477 } 1478 } 1479 *nz = nztot; 1480 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1481 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1487 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1488 { 1489 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1490 1491 PetscFunctionBegin; 1492 if (!aij->getrowactive) { 1493 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1494 } 1495 aij->getrowactive = PETSC_FALSE; 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "MatNorm_MPIAIJ" 1501 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1502 { 1503 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1504 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1505 PetscErrorCode ierr; 1506 PetscInt i,j,cstart = mat->cmap->rstart; 1507 PetscReal sum = 0.0; 1508 MatScalar *v; 1509 1510 PetscFunctionBegin; 1511 if (aij->size == 1) { 1512 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1513 } else { 1514 if (type == NORM_FROBENIUS) { 1515 v = amat->a; 1516 for (i=0; i<amat->nz; i++) { 1517 #if defined(PETSC_USE_COMPLEX) 1518 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1519 #else 1520 sum += (*v)*(*v); v++; 1521 #endif 1522 } 1523 v = bmat->a; 1524 for (i=0; i<bmat->nz; i++) { 1525 #if defined(PETSC_USE_COMPLEX) 1526 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1527 #else 1528 sum += (*v)*(*v); v++; 1529 #endif 1530 } 1531 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1532 *norm = sqrt(*norm); 1533 } else if (type == NORM_1) { /* max column norm */ 1534 PetscReal *tmp,*tmp2; 1535 PetscInt *jj,*garray = aij->garray; 1536 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1537 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1538 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1539 *norm = 0.0; 1540 v = amat->a; jj = amat->j; 1541 for (j=0; j<amat->nz; j++) { 1542 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1543 } 1544 v = bmat->a; jj = bmat->j; 1545 for (j=0; j<bmat->nz; j++) { 1546 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1547 } 1548 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1549 for (j=0; j<mat->cmap->N; j++) { 1550 if (tmp2[j] > *norm) *norm = tmp2[j]; 1551 } 1552 ierr = PetscFree(tmp);CHKERRQ(ierr); 1553 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1554 } else if (type == NORM_INFINITY) { /* max row norm */ 1555 PetscReal ntemp = 0.0; 1556 for (j=0; j<aij->A->rmap->n; j++) { 1557 v = amat->a + amat->i[j]; 1558 sum = 0.0; 1559 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1560 sum += PetscAbsScalar(*v); v++; 1561 } 1562 v = bmat->a + bmat->i[j]; 1563 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1564 sum += PetscAbsScalar(*v); v++; 1565 } 1566 if (sum > ntemp) ntemp = sum; 1567 } 1568 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1569 } else { 1570 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1571 } 1572 } 1573 PetscFunctionReturn(0); 1574 } 1575 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "MatTranspose_MPIAIJ" 1578 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1579 { 1580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1581 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1582 PetscErrorCode ierr; 1583 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1584 PetscInt cstart=A->cmap->rstart,ncol; 1585 Mat B; 1586 MatScalar *array; 1587 1588 PetscFunctionBegin; 1589 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1590 1591 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1592 ai = Aloc->i; aj = Aloc->j; 1593 bi = Bloc->i; bj = Bloc->j; 1594 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1595 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1596 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1597 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1598 for (i=0; i<ai[ma]; i++){ 1599 d_nnz[aj[i]] ++; 1600 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1601 } 1602 1603 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1604 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1605 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1606 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1607 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1608 } else { 1609 B = *matout; 1610 } 1611 1612 /* copy over the A part */ 1613 array = Aloc->a; 1614 row = A->rmap->rstart; 1615 for (i=0; i<ma; i++) { 1616 ncol = ai[i+1]-ai[i]; 1617 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1618 row++; array += ncol; aj += ncol; 1619 } 1620 aj = Aloc->j; 1621 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1622 1623 /* copy over the B part */ 1624 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1625 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1626 array = Bloc->a; 1627 row = A->rmap->rstart; 1628 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1629 cols_tmp = cols; 1630 for (i=0; i<mb; i++) { 1631 ncol = bi[i+1]-bi[i]; 1632 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1633 row++; array += ncol; cols_tmp += ncol; 1634 } 1635 ierr = PetscFree(cols);CHKERRQ(ierr); 1636 1637 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1638 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1639 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1640 *matout = B; 1641 } else { 1642 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1643 } 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1649 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1650 { 1651 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1652 Mat a = aij->A,b = aij->B; 1653 PetscErrorCode ierr; 1654 PetscInt s1,s2,s3; 1655 1656 PetscFunctionBegin; 1657 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1658 if (rr) { 1659 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1660 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1661 /* Overlap communication with computation. */ 1662 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 } 1664 if (ll) { 1665 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1666 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1667 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1668 } 1669 /* scale the diagonal block */ 1670 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1671 1672 if (rr) { 1673 /* Do a scatter end and then right scale the off-diagonal block */ 1674 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1675 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1676 } 1677 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1683 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1684 { 1685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1686 PetscErrorCode ierr; 1687 1688 PetscFunctionBegin; 1689 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1690 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1691 PetscFunctionReturn(0); 1692 } 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1695 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1696 { 1697 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1698 PetscErrorCode ierr; 1699 1700 PetscFunctionBegin; 1701 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatEqual_MPIAIJ" 1707 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1708 { 1709 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1710 Mat a,b,c,d; 1711 PetscTruth flg; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 a = matA->A; b = matA->B; 1716 c = matB->A; d = matB->B; 1717 1718 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1719 if (flg) { 1720 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1721 } 1722 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatCopy_MPIAIJ" 1728 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1729 { 1730 PetscErrorCode ierr; 1731 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1732 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1733 1734 PetscFunctionBegin; 1735 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1736 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1737 /* because of the column compression in the off-processor part of the matrix a->B, 1738 the number of columns in a->B and b->B may be different, hence we cannot call 1739 the MatCopy() directly on the two parts. If need be, we can provide a more 1740 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1741 then copying the submatrices */ 1742 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1743 } else { 1744 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1745 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1746 } 1747 PetscFunctionReturn(0); 1748 } 1749 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1752 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1753 { 1754 PetscErrorCode ierr; 1755 1756 PetscFunctionBegin; 1757 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #include "petscblaslapack.h" 1762 #undef __FUNCT__ 1763 #define __FUNCT__ "MatAXPY_MPIAIJ" 1764 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1765 { 1766 PetscErrorCode ierr; 1767 PetscInt i; 1768 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1769 PetscBLASInt bnz,one=1; 1770 Mat_SeqAIJ *x,*y; 1771 1772 PetscFunctionBegin; 1773 if (str == SAME_NONZERO_PATTERN) { 1774 PetscScalar alpha = a; 1775 x = (Mat_SeqAIJ *)xx->A->data; 1776 y = (Mat_SeqAIJ *)yy->A->data; 1777 bnz = PetscBLASIntCast(x->nz); 1778 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1779 x = (Mat_SeqAIJ *)xx->B->data; 1780 y = (Mat_SeqAIJ *)yy->B->data; 1781 bnz = PetscBLASIntCast(x->nz); 1782 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1783 } else if (str == SUBSET_NONZERO_PATTERN) { 1784 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1785 1786 x = (Mat_SeqAIJ *)xx->B->data; 1787 y = (Mat_SeqAIJ *)yy->B->data; 1788 if (y->xtoy && y->XtoY != xx->B) { 1789 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1790 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1791 } 1792 if (!y->xtoy) { /* get xtoy */ 1793 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1794 y->XtoY = xx->B; 1795 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1796 } 1797 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1798 } else { 1799 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1805 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "MatConjugate_MPIAIJ" 1808 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1809 { 1810 #if defined(PETSC_USE_COMPLEX) 1811 PetscErrorCode ierr; 1812 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1813 1814 PetscFunctionBegin; 1815 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1816 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1817 #else 1818 PetscFunctionBegin; 1819 #endif 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatRealPart_MPIAIJ" 1825 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1826 { 1827 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1828 PetscErrorCode ierr; 1829 1830 PetscFunctionBegin; 1831 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1832 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1838 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1839 { 1840 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1841 PetscErrorCode ierr; 1842 1843 PetscFunctionBegin; 1844 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1845 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1846 PetscFunctionReturn(0); 1847 } 1848 1849 #ifdef PETSC_HAVE_PBGL 1850 1851 #include <boost/parallel/mpi/bsp_process_group.hpp> 1852 #include <boost/graph/distributed/ilu_default_graph.hpp> 1853 #include <boost/graph/distributed/ilu_0_block.hpp> 1854 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1855 #include <boost/graph/distributed/petsc/interface.hpp> 1856 #include <boost/multi_array.hpp> 1857 #include <boost/parallel/distributed_property_map->hpp> 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1861 /* 1862 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1863 */ 1864 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1865 { 1866 namespace petsc = boost::distributed::petsc; 1867 1868 namespace graph_dist = boost::graph::distributed; 1869 using boost::graph::distributed::ilu_default::process_group_type; 1870 using boost::graph::ilu_permuted; 1871 1872 PetscTruth row_identity, col_identity; 1873 PetscContainer c; 1874 PetscInt m, n, M, N; 1875 PetscErrorCode ierr; 1876 1877 PetscFunctionBegin; 1878 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1879 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1880 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1881 if (!row_identity || !col_identity) { 1882 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1883 } 1884 1885 process_group_type pg; 1886 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1887 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1888 lgraph_type& level_graph = *lgraph_p; 1889 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1890 1891 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1892 ilu_permuted(level_graph); 1893 1894 /* put together the new matrix */ 1895 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1896 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1897 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1898 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1899 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1900 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1901 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1902 1903 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1904 ierr = PetscContainerSetPointer(c, lgraph_p); 1905 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1911 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1912 { 1913 PetscFunctionBegin; 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatSolve_MPIAIJ" 1919 /* 1920 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1921 */ 1922 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1923 { 1924 namespace graph_dist = boost::graph::distributed; 1925 1926 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1927 lgraph_type* lgraph_p; 1928 PetscContainer c; 1929 PetscErrorCode ierr; 1930 1931 PetscFunctionBegin; 1932 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1933 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1934 ierr = VecCopy(b, x); CHKERRQ(ierr); 1935 1936 PetscScalar* array_x; 1937 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1938 PetscInt sx; 1939 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1940 1941 PetscScalar* array_b; 1942 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1943 PetscInt sb; 1944 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1945 1946 lgraph_type& level_graph = *lgraph_p; 1947 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1948 1949 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1950 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1951 ref_x(array_x, boost::extents[num_vertices(graph)]); 1952 1953 typedef boost::iterator_property_map<array_ref_type::iterator, 1954 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1955 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1956 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1957 1958 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1959 1960 PetscFunctionReturn(0); 1961 } 1962 #endif 1963 1964 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1965 PetscInt nzlocal,nsends,nrecvs; 1966 PetscMPIInt *send_rank; 1967 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1968 PetscScalar *sbuf_a,**rbuf_a; 1969 PetscErrorCode (*MatDestroy)(Mat); 1970 } Mat_Redundant; 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1974 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1975 { 1976 PetscErrorCode ierr; 1977 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1978 PetscInt i; 1979 1980 PetscFunctionBegin; 1981 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1982 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1983 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1984 for (i=0; i<redund->nrecvs; i++){ 1985 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1986 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1987 } 1988 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1989 ierr = PetscFree(redund);CHKERRQ(ierr); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 #undef __FUNCT__ 1994 #define __FUNCT__ "MatDestroy_MatRedundant" 1995 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1996 { 1997 PetscErrorCode ierr; 1998 PetscContainer container; 1999 Mat_Redundant *redund=PETSC_NULL; 2000 2001 PetscFunctionBegin; 2002 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2003 if (container) { 2004 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2005 } else { 2006 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2007 } 2008 A->ops->destroy = redund->MatDestroy; 2009 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2010 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2011 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2012 PetscFunctionReturn(0); 2013 } 2014 2015 #undef __FUNCT__ 2016 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2017 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2018 { 2019 PetscMPIInt rank,size; 2020 MPI_Comm comm=((PetscObject)mat)->comm; 2021 PetscErrorCode ierr; 2022 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2023 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2024 PetscInt *rowrange=mat->rmap->range; 2025 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2026 Mat A=aij->A,B=aij->B,C=*matredundant; 2027 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2028 PetscScalar *sbuf_a; 2029 PetscInt nzlocal=a->nz+b->nz; 2030 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2031 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2032 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2033 MatScalar *aworkA,*aworkB; 2034 PetscScalar *vals; 2035 PetscMPIInt tag1,tag2,tag3,imdex; 2036 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2037 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2038 MPI_Status recv_status,*send_status; 2039 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2040 PetscInt **rbuf_j=PETSC_NULL; 2041 PetscScalar **rbuf_a=PETSC_NULL; 2042 Mat_Redundant *redund=PETSC_NULL; 2043 PetscContainer container; 2044 2045 PetscFunctionBegin; 2046 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2047 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2048 2049 if (reuse == MAT_REUSE_MATRIX) { 2050 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2051 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2052 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2053 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2054 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2055 if (container) { 2056 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2057 } else { 2058 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2059 } 2060 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2061 2062 nsends = redund->nsends; 2063 nrecvs = redund->nrecvs; 2064 send_rank = redund->send_rank; recv_rank = send_rank + size; 2065 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2066 sbuf_j = redund->sbuf_j; 2067 sbuf_a = redund->sbuf_a; 2068 rbuf_j = redund->rbuf_j; 2069 rbuf_a = redund->rbuf_a; 2070 } 2071 2072 if (reuse == MAT_INITIAL_MATRIX){ 2073 PetscMPIInt subrank,subsize; 2074 PetscInt nleftover,np_subcomm; 2075 /* get the destination processors' id send_rank, nsends and nrecvs */ 2076 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2077 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2078 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2079 recv_rank = send_rank + size; 2080 np_subcomm = size/nsubcomm; 2081 nleftover = size - nsubcomm*np_subcomm; 2082 nsends = 0; nrecvs = 0; 2083 for (i=0; i<size; i++){ /* i=rank*/ 2084 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2085 send_rank[nsends] = i; nsends++; 2086 recv_rank[nrecvs++] = i; 2087 } 2088 } 2089 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2090 i = size-nleftover-1; 2091 j = 0; 2092 while (j < nsubcomm - nleftover){ 2093 send_rank[nsends++] = i; 2094 i--; j++; 2095 } 2096 } 2097 2098 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2099 for (i=0; i<nleftover; i++){ 2100 recv_rank[nrecvs++] = size-nleftover+i; 2101 } 2102 } 2103 2104 /* allocate sbuf_j, sbuf_a */ 2105 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2106 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2107 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2108 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2109 2110 /* copy mat's local entries into the buffers */ 2111 if (reuse == MAT_INITIAL_MATRIX){ 2112 rownz_max = 0; 2113 rptr = sbuf_j; 2114 cols = sbuf_j + rend-rstart + 1; 2115 vals = sbuf_a; 2116 rptr[0] = 0; 2117 for (i=0; i<rend-rstart; i++){ 2118 row = i + rstart; 2119 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2120 ncols = nzA + nzB; 2121 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2122 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2123 /* load the column indices for this row into cols */ 2124 lwrite = 0; 2125 for (l=0; l<nzB; l++) { 2126 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2127 vals[lwrite] = aworkB[l]; 2128 cols[lwrite++] = ctmp; 2129 } 2130 } 2131 for (l=0; l<nzA; l++){ 2132 vals[lwrite] = aworkA[l]; 2133 cols[lwrite++] = cstart + cworkA[l]; 2134 } 2135 for (l=0; l<nzB; l++) { 2136 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2137 vals[lwrite] = aworkB[l]; 2138 cols[lwrite++] = ctmp; 2139 } 2140 } 2141 vals += ncols; 2142 cols += ncols; 2143 rptr[i+1] = rptr[i] + ncols; 2144 if (rownz_max < ncols) rownz_max = ncols; 2145 } 2146 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2147 } else { /* only copy matrix values into sbuf_a */ 2148 rptr = sbuf_j; 2149 vals = sbuf_a; 2150 rptr[0] = 0; 2151 for (i=0; i<rend-rstart; i++){ 2152 row = i + rstart; 2153 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2154 ncols = nzA + nzB; 2155 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2156 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2157 lwrite = 0; 2158 for (l=0; l<nzB; l++) { 2159 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2160 } 2161 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2162 for (l=0; l<nzB; l++) { 2163 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2164 } 2165 vals += ncols; 2166 rptr[i+1] = rptr[i] + ncols; 2167 } 2168 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2169 2170 /* send nzlocal to others, and recv other's nzlocal */ 2171 /*--------------------------------------------------*/ 2172 if (reuse == MAT_INITIAL_MATRIX){ 2173 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2174 s_waits2 = s_waits3 + nsends; 2175 s_waits1 = s_waits2 + nsends; 2176 r_waits1 = s_waits1 + nsends; 2177 r_waits2 = r_waits1 + nrecvs; 2178 r_waits3 = r_waits2 + nrecvs; 2179 } else { 2180 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2181 r_waits3 = s_waits3 + nsends; 2182 } 2183 2184 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2185 if (reuse == MAT_INITIAL_MATRIX){ 2186 /* get new tags to keep the communication clean */ 2187 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2188 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2189 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2190 rbuf_nz = sbuf_nz + nsends; 2191 2192 /* post receives of other's nzlocal */ 2193 for (i=0; i<nrecvs; i++){ 2194 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2195 } 2196 /* send nzlocal to others */ 2197 for (i=0; i<nsends; i++){ 2198 sbuf_nz[i] = nzlocal; 2199 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2200 } 2201 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2202 count = nrecvs; 2203 while (count) { 2204 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2205 recv_rank[imdex] = recv_status.MPI_SOURCE; 2206 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2207 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2208 2209 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2210 rbuf_nz[imdex] += i + 2; 2211 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2212 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2213 count--; 2214 } 2215 /* wait on sends of nzlocal */ 2216 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2217 /* send mat->i,j to others, and recv from other's */ 2218 /*------------------------------------------------*/ 2219 for (i=0; i<nsends; i++){ 2220 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2221 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2222 } 2223 /* wait on receives of mat->i,j */ 2224 /*------------------------------*/ 2225 count = nrecvs; 2226 while (count) { 2227 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2228 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2229 count--; 2230 } 2231 /* wait on sends of mat->i,j */ 2232 /*---------------------------*/ 2233 if (nsends) { 2234 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2235 } 2236 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2237 2238 /* post receives, send and receive mat->a */ 2239 /*----------------------------------------*/ 2240 for (imdex=0; imdex<nrecvs; imdex++) { 2241 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2242 } 2243 for (i=0; i<nsends; i++){ 2244 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2245 } 2246 count = nrecvs; 2247 while (count) { 2248 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2249 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2250 count--; 2251 } 2252 if (nsends) { 2253 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2254 } 2255 2256 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2257 2258 /* create redundant matrix */ 2259 /*-------------------------*/ 2260 if (reuse == MAT_INITIAL_MATRIX){ 2261 /* compute rownz_max for preallocation */ 2262 for (imdex=0; imdex<nrecvs; imdex++){ 2263 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2264 rptr = rbuf_j[imdex]; 2265 for (i=0; i<j; i++){ 2266 ncols = rptr[i+1] - rptr[i]; 2267 if (rownz_max < ncols) rownz_max = ncols; 2268 } 2269 } 2270 2271 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2272 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2273 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2274 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2275 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2276 } else { 2277 C = *matredundant; 2278 } 2279 2280 /* insert local matrix entries */ 2281 rptr = sbuf_j; 2282 cols = sbuf_j + rend-rstart + 1; 2283 vals = sbuf_a; 2284 for (i=0; i<rend-rstart; i++){ 2285 row = i + rstart; 2286 ncols = rptr[i+1] - rptr[i]; 2287 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2288 vals += ncols; 2289 cols += ncols; 2290 } 2291 /* insert received matrix entries */ 2292 for (imdex=0; imdex<nrecvs; imdex++){ 2293 rstart = rowrange[recv_rank[imdex]]; 2294 rend = rowrange[recv_rank[imdex]+1]; 2295 rptr = rbuf_j[imdex]; 2296 cols = rbuf_j[imdex] + rend-rstart + 1; 2297 vals = rbuf_a[imdex]; 2298 for (i=0; i<rend-rstart; i++){ 2299 row = i + rstart; 2300 ncols = rptr[i+1] - rptr[i]; 2301 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2302 vals += ncols; 2303 cols += ncols; 2304 } 2305 } 2306 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2307 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2308 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2309 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2310 if (reuse == MAT_INITIAL_MATRIX){ 2311 PetscContainer container; 2312 *matredundant = C; 2313 /* create a supporting struct and attach it to C for reuse */ 2314 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2315 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2316 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2317 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2318 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2319 2320 redund->nzlocal = nzlocal; 2321 redund->nsends = nsends; 2322 redund->nrecvs = nrecvs; 2323 redund->send_rank = send_rank; 2324 redund->sbuf_nz = sbuf_nz; 2325 redund->sbuf_j = sbuf_j; 2326 redund->sbuf_a = sbuf_a; 2327 redund->rbuf_j = rbuf_j; 2328 redund->rbuf_a = rbuf_a; 2329 2330 redund->MatDestroy = C->ops->destroy; 2331 C->ops->destroy = MatDestroy_MatRedundant; 2332 } 2333 PetscFunctionReturn(0); 2334 } 2335 2336 #undef __FUNCT__ 2337 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2338 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2339 { 2340 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2341 PetscErrorCode ierr; 2342 PetscInt i,*idxb = 0; 2343 PetscScalar *va,*vb; 2344 Vec vtmp; 2345 2346 PetscFunctionBegin; 2347 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2348 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2349 if (idx) { 2350 for (i=0; i<A->rmap->n; i++) { 2351 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2352 } 2353 } 2354 2355 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2356 if (idx) { 2357 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2358 } 2359 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2360 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2361 2362 for (i=0; i<A->rmap->n; i++){ 2363 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2364 va[i] = vb[i]; 2365 if (idx) idx[i] = a->garray[idxb[i]]; 2366 } 2367 } 2368 2369 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2370 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2371 if (idxb) { 2372 ierr = PetscFree(idxb);CHKERRQ(ierr); 2373 } 2374 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 2378 #undef __FUNCT__ 2379 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2380 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2381 { 2382 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2383 PetscErrorCode ierr; 2384 PetscInt i,*idxb = 0; 2385 PetscScalar *va,*vb; 2386 Vec vtmp; 2387 2388 PetscFunctionBegin; 2389 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2390 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2391 if (idx) { 2392 for (i=0; i<A->cmap->n; i++) { 2393 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2394 } 2395 } 2396 2397 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2398 if (idx) { 2399 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2400 } 2401 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2402 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2403 2404 for (i=0; i<A->rmap->n; i++){ 2405 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2406 va[i] = vb[i]; 2407 if (idx) idx[i] = a->garray[idxb[i]]; 2408 } 2409 } 2410 2411 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2412 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2413 if (idxb) { 2414 ierr = PetscFree(idxb);CHKERRQ(ierr); 2415 } 2416 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2422 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2423 { 2424 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2425 PetscInt n = A->rmap->n; 2426 PetscInt cstart = A->cmap->rstart; 2427 PetscInt *cmap = mat->garray; 2428 PetscInt *diagIdx, *offdiagIdx; 2429 Vec diagV, offdiagV; 2430 PetscScalar *a, *diagA, *offdiagA; 2431 PetscInt r; 2432 PetscErrorCode ierr; 2433 2434 PetscFunctionBegin; 2435 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2436 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2437 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2438 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2439 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2440 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2441 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2442 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2443 for(r = 0; r < n; ++r) { 2444 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2445 a[r] = diagA[r]; 2446 idx[r] = cstart + diagIdx[r]; 2447 } else { 2448 a[r] = offdiagA[r]; 2449 idx[r] = cmap[offdiagIdx[r]]; 2450 } 2451 } 2452 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2453 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2454 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2455 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2456 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2457 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 #undef __FUNCT__ 2462 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2463 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2464 { 2465 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2466 PetscInt n = A->rmap->n; 2467 PetscInt cstart = A->cmap->rstart; 2468 PetscInt *cmap = mat->garray; 2469 PetscInt *diagIdx, *offdiagIdx; 2470 Vec diagV, offdiagV; 2471 PetscScalar *a, *diagA, *offdiagA; 2472 PetscInt r; 2473 PetscErrorCode ierr; 2474 2475 PetscFunctionBegin; 2476 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2477 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2478 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2479 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2480 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2481 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2482 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2483 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2484 for(r = 0; r < n; ++r) { 2485 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2486 a[r] = diagA[r]; 2487 idx[r] = cstart + diagIdx[r]; 2488 } else { 2489 a[r] = offdiagA[r]; 2490 idx[r] = cmap[offdiagIdx[r]]; 2491 } 2492 } 2493 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2494 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2495 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2496 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2497 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2498 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2499 PetscFunctionReturn(0); 2500 } 2501 2502 #undef __FUNCT__ 2503 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2504 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2505 { 2506 PetscErrorCode ierr; 2507 2508 PetscFunctionBegin; 2509 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2510 PetscFunctionReturn(0); 2511 } 2512 2513 /* -------------------------------------------------------------------*/ 2514 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2515 MatGetRow_MPIAIJ, 2516 MatRestoreRow_MPIAIJ, 2517 MatMult_MPIAIJ, 2518 /* 4*/ MatMultAdd_MPIAIJ, 2519 MatMultTranspose_MPIAIJ, 2520 MatMultTransposeAdd_MPIAIJ, 2521 #ifdef PETSC_HAVE_PBGL 2522 MatSolve_MPIAIJ, 2523 #else 2524 0, 2525 #endif 2526 0, 2527 0, 2528 /*10*/ 0, 2529 0, 2530 0, 2531 MatRelax_MPIAIJ, 2532 MatTranspose_MPIAIJ, 2533 /*15*/ MatGetInfo_MPIAIJ, 2534 MatEqual_MPIAIJ, 2535 MatGetDiagonal_MPIAIJ, 2536 MatDiagonalScale_MPIAIJ, 2537 MatNorm_MPIAIJ, 2538 /*20*/ MatAssemblyBegin_MPIAIJ, 2539 MatAssemblyEnd_MPIAIJ, 2540 0, 2541 MatSetOption_MPIAIJ, 2542 MatZeroEntries_MPIAIJ, 2543 /*25*/ MatZeroRows_MPIAIJ, 2544 0, 2545 #ifdef PETSC_HAVE_PBGL 2546 0, 2547 #else 2548 0, 2549 #endif 2550 0, 2551 0, 2552 /*30*/ MatSetUpPreallocation_MPIAIJ, 2553 #ifdef PETSC_HAVE_PBGL 2554 0, 2555 #else 2556 0, 2557 #endif 2558 0, 2559 0, 2560 0, 2561 /*35*/ MatDuplicate_MPIAIJ, 2562 0, 2563 0, 2564 0, 2565 0, 2566 /*40*/ MatAXPY_MPIAIJ, 2567 MatGetSubMatrices_MPIAIJ, 2568 MatIncreaseOverlap_MPIAIJ, 2569 MatGetValues_MPIAIJ, 2570 MatCopy_MPIAIJ, 2571 /*45*/ MatGetRowMax_MPIAIJ, 2572 MatScale_MPIAIJ, 2573 0, 2574 0, 2575 0, 2576 /*50*/ MatSetBlockSize_MPIAIJ, 2577 0, 2578 0, 2579 0, 2580 0, 2581 /*55*/ MatFDColoringCreate_MPIAIJ, 2582 0, 2583 MatSetUnfactored_MPIAIJ, 2584 MatPermute_MPIAIJ, 2585 0, 2586 /*60*/ MatGetSubMatrix_MPIAIJ, 2587 MatDestroy_MPIAIJ, 2588 MatView_MPIAIJ, 2589 0, 2590 0, 2591 /*65*/ 0, 2592 0, 2593 0, 2594 0, 2595 0, 2596 /*70*/ MatGetRowMaxAbs_MPIAIJ, 2597 MatGetRowMinAbs_MPIAIJ, 2598 0, 2599 MatSetColoring_MPIAIJ, 2600 #if defined(PETSC_HAVE_ADIC) 2601 MatSetValuesAdic_MPIAIJ, 2602 #else 2603 0, 2604 #endif 2605 MatSetValuesAdifor_MPIAIJ, 2606 /*75*/ 0, 2607 0, 2608 0, 2609 0, 2610 0, 2611 /*80*/ 0, 2612 0, 2613 0, 2614 /*84*/ MatLoad_MPIAIJ, 2615 0, 2616 0, 2617 0, 2618 0, 2619 0, 2620 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2621 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2622 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2623 MatPtAP_Basic, 2624 MatPtAPSymbolic_MPIAIJ, 2625 /*95*/ MatPtAPNumeric_MPIAIJ, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /*100*/0, 2631 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2632 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2633 MatConjugate_MPIAIJ, 2634 0, 2635 /*105*/MatSetValuesRow_MPIAIJ, 2636 MatRealPart_MPIAIJ, 2637 MatImaginaryPart_MPIAIJ, 2638 0, 2639 0, 2640 /*110*/0, 2641 MatGetRedundantMatrix_MPIAIJ, 2642 MatGetRowMin_MPIAIJ, 2643 0, 2644 0, 2645 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2646 2647 /* ----------------------------------------------------------------------------------------*/ 2648 2649 EXTERN_C_BEGIN 2650 #undef __FUNCT__ 2651 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2652 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2653 { 2654 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2655 PetscErrorCode ierr; 2656 2657 PetscFunctionBegin; 2658 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2659 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2660 PetscFunctionReturn(0); 2661 } 2662 EXTERN_C_END 2663 2664 EXTERN_C_BEGIN 2665 #undef __FUNCT__ 2666 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2667 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2668 { 2669 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2670 PetscErrorCode ierr; 2671 2672 PetscFunctionBegin; 2673 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2674 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2675 PetscFunctionReturn(0); 2676 } 2677 EXTERN_C_END 2678 2679 #include "petscpc.h" 2680 EXTERN_C_BEGIN 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2683 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2684 { 2685 Mat_MPIAIJ *b; 2686 PetscErrorCode ierr; 2687 PetscInt i; 2688 2689 PetscFunctionBegin; 2690 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2691 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2692 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2693 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2694 2695 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2696 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2697 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2698 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2699 if (d_nnz) { 2700 for (i=0; i<B->rmap->n; i++) { 2701 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2702 } 2703 } 2704 if (o_nnz) { 2705 for (i=0; i<B->rmap->n; i++) { 2706 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2707 } 2708 } 2709 b = (Mat_MPIAIJ*)B->data; 2710 2711 if (!B->preallocated) { 2712 /* Explicitly create 2 MATSEQAIJ matrices. */ 2713 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2714 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2715 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2716 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2717 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2718 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2719 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2720 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2721 } 2722 2723 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2724 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2725 B->preallocated = PETSC_TRUE; 2726 PetscFunctionReturn(0); 2727 } 2728 EXTERN_C_END 2729 2730 #undef __FUNCT__ 2731 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2732 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2733 { 2734 Mat mat; 2735 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2736 PetscErrorCode ierr; 2737 2738 PetscFunctionBegin; 2739 *newmat = 0; 2740 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2741 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2742 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2743 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2744 a = (Mat_MPIAIJ*)mat->data; 2745 2746 mat->factor = matin->factor; 2747 mat->rmap->bs = matin->rmap->bs; 2748 mat->assembled = PETSC_TRUE; 2749 mat->insertmode = NOT_SET_VALUES; 2750 mat->preallocated = PETSC_TRUE; 2751 2752 a->size = oldmat->size; 2753 a->rank = oldmat->rank; 2754 a->donotstash = oldmat->donotstash; 2755 a->roworiented = oldmat->roworiented; 2756 a->rowindices = 0; 2757 a->rowvalues = 0; 2758 a->getrowactive = PETSC_FALSE; 2759 2760 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2761 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2762 2763 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2764 if (oldmat->colmap) { 2765 #if defined (PETSC_USE_CTABLE) 2766 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2767 #else 2768 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2769 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2770 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2771 #endif 2772 } else a->colmap = 0; 2773 if (oldmat->garray) { 2774 PetscInt len; 2775 len = oldmat->B->cmap->n; 2776 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2777 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2778 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2779 } else a->garray = 0; 2780 2781 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2782 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2783 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2785 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2786 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2787 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2788 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2789 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2790 *newmat = mat; 2791 PetscFunctionReturn(0); 2792 } 2793 2794 #include "petscsys.h" 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatLoad_MPIAIJ" 2798 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2799 { 2800 Mat A; 2801 PetscScalar *vals,*svals; 2802 MPI_Comm comm = ((PetscObject)viewer)->comm; 2803 MPI_Status status; 2804 PetscErrorCode ierr; 2805 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2806 PetscInt i,nz,j,rstart,rend,mmax,maxnz; 2807 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2808 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2809 PetscInt cend,cstart,n,*rowners; 2810 int fd; 2811 2812 PetscFunctionBegin; 2813 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2814 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2815 if (!rank) { 2816 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2817 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2818 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2819 } 2820 2821 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2822 M = header[1]; N = header[2]; 2823 /* determine ownership of all rows */ 2824 m = M/size + ((M % size) > rank); 2825 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2826 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2827 2828 /* First process needs enough room for process with most rows */ 2829 if (!rank) { 2830 mmax = rowners[1]; 2831 for (i=2; i<size; i++) { 2832 mmax = PetscMax(mmax,rowners[i]); 2833 } 2834 } else mmax = m; 2835 2836 rowners[0] = 0; 2837 for (i=2; i<=size; i++) { 2838 rowners[i] += rowners[i-1]; 2839 } 2840 rstart = rowners[rank]; 2841 rend = rowners[rank+1]; 2842 2843 /* distribute row lengths to all processors */ 2844 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2845 if (!rank) { 2846 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2847 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2848 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2849 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2850 for (j=0; j<m; j++) { 2851 procsnz[0] += ourlens[j]; 2852 } 2853 for (i=1; i<size; i++) { 2854 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2855 /* calculate the number of nonzeros on each processor */ 2856 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2857 procsnz[i] += rowlengths[j]; 2858 } 2859 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2860 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2861 } 2862 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2863 } else { 2864 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2865 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2866 } 2867 2868 if (!rank) { 2869 /* determine max buffer needed and allocate it */ 2870 maxnz = 0; 2871 for (i=0; i<size; i++) { 2872 maxnz = PetscMax(maxnz,procsnz[i]); 2873 } 2874 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2875 2876 /* read in my part of the matrix column indices */ 2877 nz = procsnz[0]; 2878 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2879 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2880 2881 /* read in every one elses and ship off */ 2882 for (i=1; i<size; i++) { 2883 nz = procsnz[i]; 2884 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2885 mpicnt = PetscMPIIntCast(nz); 2886 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2887 } 2888 ierr = PetscFree(cols);CHKERRQ(ierr); 2889 } else { 2890 /* determine buffer space needed for message */ 2891 nz = 0; 2892 for (i=0; i<m; i++) { 2893 nz += ourlens[i]; 2894 } 2895 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2896 2897 /* receive message of column indices*/ 2898 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2899 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2900 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2901 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2902 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2903 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2904 } 2905 2906 /* determine column ownership if matrix is not square */ 2907 if (N != M) { 2908 n = N/size + ((N % size) > rank); 2909 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2910 cstart = cend - n; 2911 } else { 2912 cstart = rstart; 2913 cend = rend; 2914 n = cend - cstart; 2915 } 2916 2917 /* loop over local rows, determining number of off diagonal entries */ 2918 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2919 jj = 0; 2920 for (i=0; i<m; i++) { 2921 for (j=0; j<ourlens[i]; j++) { 2922 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2923 jj++; 2924 } 2925 } 2926 2927 /* create our matrix */ 2928 for (i=0; i<m; i++) { 2929 ourlens[i] -= offlens[i]; 2930 } 2931 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2932 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2933 ierr = MatSetType(A,type);CHKERRQ(ierr); 2934 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2935 2936 for (i=0; i<m; i++) { 2937 ourlens[i] += offlens[i]; 2938 } 2939 2940 if (!rank) { 2941 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2942 2943 /* read in my part of the matrix numerical values */ 2944 nz = procsnz[0]; 2945 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2946 2947 /* insert into matrix */ 2948 jj = rstart; 2949 smycols = mycols; 2950 svals = vals; 2951 for (i=0; i<m; i++) { 2952 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2953 smycols += ourlens[i]; 2954 svals += ourlens[i]; 2955 jj++; 2956 } 2957 2958 /* read in other processors and ship out */ 2959 for (i=1; i<size; i++) { 2960 nz = procsnz[i]; 2961 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2962 mpicnt = PetscMPIIntCast(nz); 2963 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2964 } 2965 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2966 } else { 2967 /* receive numeric values */ 2968 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2969 2970 /* receive message of values*/ 2971 mpicnt = PetscMPIIntCast(nz); 2972 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2973 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 2974 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2975 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2976 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2977 2978 /* insert into matrix */ 2979 jj = rstart; 2980 smycols = mycols; 2981 svals = vals; 2982 for (i=0; i<m; i++) { 2983 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2984 smycols += ourlens[i]; 2985 svals += ourlens[i]; 2986 jj++; 2987 } 2988 } 2989 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2990 ierr = PetscFree(vals);CHKERRQ(ierr); 2991 ierr = PetscFree(mycols);CHKERRQ(ierr); 2992 ierr = PetscFree(rowners);CHKERRQ(ierr); 2993 2994 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2995 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2996 *newmat = A; 2997 PetscFunctionReturn(0); 2998 } 2999 3000 #undef __FUNCT__ 3001 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3002 /* 3003 Not great since it makes two copies of the submatrix, first an SeqAIJ 3004 in local and then by concatenating the local matrices the end result. 3005 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3006 */ 3007 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3008 { 3009 PetscErrorCode ierr; 3010 PetscMPIInt rank,size; 3011 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3012 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3013 Mat *local,M,Mreuse; 3014 MatScalar *vwork,*aa; 3015 MPI_Comm comm = ((PetscObject)mat)->comm; 3016 Mat_SeqAIJ *aij; 3017 3018 3019 PetscFunctionBegin; 3020 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3021 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3022 3023 if (call == MAT_REUSE_MATRIX) { 3024 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3025 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3026 local = &Mreuse; 3027 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3028 } else { 3029 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3030 Mreuse = *local; 3031 ierr = PetscFree(local);CHKERRQ(ierr); 3032 } 3033 3034 /* 3035 m - number of local rows 3036 n - number of columns (same on all processors) 3037 rstart - first row in new global matrix generated 3038 */ 3039 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3040 if (call == MAT_INITIAL_MATRIX) { 3041 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3042 ii = aij->i; 3043 jj = aij->j; 3044 3045 /* 3046 Determine the number of non-zeros in the diagonal and off-diagonal 3047 portions of the matrix in order to do correct preallocation 3048 */ 3049 3050 /* first get start and end of "diagonal" columns */ 3051 if (csize == PETSC_DECIDE) { 3052 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3053 if (mglobal == n) { /* square matrix */ 3054 nlocal = m; 3055 } else { 3056 nlocal = n/size + ((n % size) > rank); 3057 } 3058 } else { 3059 nlocal = csize; 3060 } 3061 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3062 rstart = rend - nlocal; 3063 if (rank == size - 1 && rend != n) { 3064 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3065 } 3066 3067 /* next, compute all the lengths */ 3068 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3069 olens = dlens + m; 3070 for (i=0; i<m; i++) { 3071 jend = ii[i+1] - ii[i]; 3072 olen = 0; 3073 dlen = 0; 3074 for (j=0; j<jend; j++) { 3075 if (*jj < rstart || *jj >= rend) olen++; 3076 else dlen++; 3077 jj++; 3078 } 3079 olens[i] = olen; 3080 dlens[i] = dlen; 3081 } 3082 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3083 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3084 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3085 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3086 ierr = PetscFree(dlens);CHKERRQ(ierr); 3087 } else { 3088 PetscInt ml,nl; 3089 3090 M = *newmat; 3091 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3092 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3093 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3094 /* 3095 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3096 rather than the slower MatSetValues(). 3097 */ 3098 M->was_assembled = PETSC_TRUE; 3099 M->assembled = PETSC_FALSE; 3100 } 3101 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3102 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3103 ii = aij->i; 3104 jj = aij->j; 3105 aa = aij->a; 3106 for (i=0; i<m; i++) { 3107 row = rstart + i; 3108 nz = ii[i+1] - ii[i]; 3109 cwork = jj; jj += nz; 3110 vwork = aa; aa += nz; 3111 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3112 } 3113 3114 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3115 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3116 *newmat = M; 3117 3118 /* save submatrix used in processor for next request */ 3119 if (call == MAT_INITIAL_MATRIX) { 3120 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3121 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3122 } 3123 3124 PetscFunctionReturn(0); 3125 } 3126 3127 EXTERN_C_BEGIN 3128 #undef __FUNCT__ 3129 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3130 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3131 { 3132 PetscInt m,cstart, cend,j,nnz,i,d; 3133 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3134 const PetscInt *JJ; 3135 PetscScalar *values; 3136 PetscErrorCode ierr; 3137 3138 PetscFunctionBegin; 3139 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3140 3141 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3142 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3143 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3144 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3145 m = B->rmap->n; 3146 cstart = B->cmap->rstart; 3147 cend = B->cmap->rend; 3148 rstart = B->rmap->rstart; 3149 3150 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3151 o_nnz = d_nnz + m; 3152 3153 #if defined(PETSC_USE_DEBUGGING) 3154 for (i=0; i<m; i++) { 3155 nnz = Ii[i+1]- Ii[i]; 3156 JJ = J + Ii[i]; 3157 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3158 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3159 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3160 for (j=1; j<nnz; j++) { 3161 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3162 } 3163 } 3164 #endif 3165 3166 for (i=0; i<m; i++) { 3167 nnz = Ii[i+1]- Ii[i]; 3168 JJ = J + Ii[i]; 3169 nnz_max = PetscMax(nnz_max,nnz); 3170 for (j=0; j<nnz; j++) { 3171 if (*JJ >= cstart) break; 3172 JJ++; 3173 } 3174 d = 0; 3175 for (; j<nnz; j++) { 3176 if (*JJ++ >= cend) break; 3177 d++; 3178 } 3179 d_nnz[i] = d; 3180 o_nnz[i] = nnz - d; 3181 } 3182 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3183 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3184 3185 if (v) values = (PetscScalar*)v; 3186 else { 3187 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3188 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3189 } 3190 3191 for (i=0; i<m; i++) { 3192 ii = i + rstart; 3193 nnz = Ii[i+1]- Ii[i]; 3194 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3195 } 3196 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3197 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3198 3199 if (!v) { 3200 ierr = PetscFree(values);CHKERRQ(ierr); 3201 } 3202 PetscFunctionReturn(0); 3203 } 3204 EXTERN_C_END 3205 3206 #undef __FUNCT__ 3207 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3208 /*@ 3209 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3210 (the default parallel PETSc format). 3211 3212 Collective on MPI_Comm 3213 3214 Input Parameters: 3215 + B - the matrix 3216 . i - the indices into j for the start of each local row (starts with zero) 3217 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3218 - v - optional values in the matrix 3219 3220 Level: developer 3221 3222 Notes: 3223 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3224 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3225 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3226 3227 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3228 3229 The format which is used for the sparse matrix input, is equivalent to a 3230 row-major ordering.. i.e for the following matrix, the input data expected is 3231 as shown: 3232 3233 1 0 0 3234 2 0 3 P0 3235 ------- 3236 4 5 6 P1 3237 3238 Process0 [P0]: rows_owned=[0,1] 3239 i = {0,1,3} [size = nrow+1 = 2+1] 3240 j = {0,0,2} [size = nz = 6] 3241 v = {1,2,3} [size = nz = 6] 3242 3243 Process1 [P1]: rows_owned=[2] 3244 i = {0,3} [size = nrow+1 = 1+1] 3245 j = {0,1,2} [size = nz = 6] 3246 v = {4,5,6} [size = nz = 6] 3247 3248 The column indices for each row MUST be sorted. 3249 3250 .keywords: matrix, aij, compressed row, sparse, parallel 3251 3252 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3253 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3254 @*/ 3255 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3256 { 3257 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3258 3259 PetscFunctionBegin; 3260 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3261 if (f) { 3262 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3263 } 3264 PetscFunctionReturn(0); 3265 } 3266 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3269 /*@C 3270 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3271 (the default parallel PETSc format). For good matrix assembly performance 3272 the user should preallocate the matrix storage by setting the parameters 3273 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3274 performance can be increased by more than a factor of 50. 3275 3276 Collective on MPI_Comm 3277 3278 Input Parameters: 3279 + A - the matrix 3280 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3281 (same value is used for all local rows) 3282 . d_nnz - array containing the number of nonzeros in the various rows of the 3283 DIAGONAL portion of the local submatrix (possibly different for each row) 3284 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3285 The size of this array is equal to the number of local rows, i.e 'm'. 3286 You must leave room for the diagonal entry even if it is zero. 3287 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3288 submatrix (same value is used for all local rows). 3289 - o_nnz - array containing the number of nonzeros in the various rows of the 3290 OFF-DIAGONAL portion of the local submatrix (possibly different for 3291 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3292 structure. The size of this array is equal to the number 3293 of local rows, i.e 'm'. 3294 3295 If the *_nnz parameter is given then the *_nz parameter is ignored 3296 3297 The AIJ format (also called the Yale sparse matrix format or 3298 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3299 storage. The stored row and column indices begin with zero. See the users manual for details. 3300 3301 The parallel matrix is partitioned such that the first m0 rows belong to 3302 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3303 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3304 3305 The DIAGONAL portion of the local submatrix of a processor can be defined 3306 as the submatrix which is obtained by extraction the part corresponding 3307 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3308 first row that belongs to the processor, and r2 is the last row belonging 3309 to the this processor. This is a square mxm matrix. The remaining portion 3310 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3311 3312 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3313 3314 You can call MatGetInfo() to get information on how effective the preallocation was; 3315 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3316 You can also run with the option -info and look for messages with the string 3317 malloc in them to see if additional memory allocation was needed. 3318 3319 Example usage: 3320 3321 Consider the following 8x8 matrix with 34 non-zero values, that is 3322 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3323 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3324 as follows: 3325 3326 .vb 3327 1 2 0 | 0 3 0 | 0 4 3328 Proc0 0 5 6 | 7 0 0 | 8 0 3329 9 0 10 | 11 0 0 | 12 0 3330 ------------------------------------- 3331 13 0 14 | 15 16 17 | 0 0 3332 Proc1 0 18 0 | 19 20 21 | 0 0 3333 0 0 0 | 22 23 0 | 24 0 3334 ------------------------------------- 3335 Proc2 25 26 27 | 0 0 28 | 29 0 3336 30 0 0 | 31 32 33 | 0 34 3337 .ve 3338 3339 This can be represented as a collection of submatrices as: 3340 3341 .vb 3342 A B C 3343 D E F 3344 G H I 3345 .ve 3346 3347 Where the submatrices A,B,C are owned by proc0, D,E,F are 3348 owned by proc1, G,H,I are owned by proc2. 3349 3350 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3351 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3352 The 'M','N' parameters are 8,8, and have the same values on all procs. 3353 3354 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3355 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3356 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3357 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3358 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3359 matrix, ans [DF] as another SeqAIJ matrix. 3360 3361 When d_nz, o_nz parameters are specified, d_nz storage elements are 3362 allocated for every row of the local diagonal submatrix, and o_nz 3363 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3364 One way to choose d_nz and o_nz is to use the max nonzerors per local 3365 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3366 In this case, the values of d_nz,o_nz are: 3367 .vb 3368 proc0 : dnz = 2, o_nz = 2 3369 proc1 : dnz = 3, o_nz = 2 3370 proc2 : dnz = 1, o_nz = 4 3371 .ve 3372 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3373 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3374 for proc3. i.e we are using 12+15+10=37 storage locations to store 3375 34 values. 3376 3377 When d_nnz, o_nnz parameters are specified, the storage is specified 3378 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3379 In the above case the values for d_nnz,o_nnz are: 3380 .vb 3381 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3382 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3383 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3384 .ve 3385 Here the space allocated is sum of all the above values i.e 34, and 3386 hence pre-allocation is perfect. 3387 3388 Level: intermediate 3389 3390 .keywords: matrix, aij, compressed row, sparse, parallel 3391 3392 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3393 MPIAIJ, MatGetInfo() 3394 @*/ 3395 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3396 { 3397 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3398 3399 PetscFunctionBegin; 3400 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3401 if (f) { 3402 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3403 } 3404 PetscFunctionReturn(0); 3405 } 3406 3407 #undef __FUNCT__ 3408 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3409 /*@ 3410 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3411 CSR format the local rows. 3412 3413 Collective on MPI_Comm 3414 3415 Input Parameters: 3416 + comm - MPI communicator 3417 . m - number of local rows (Cannot be PETSC_DECIDE) 3418 . n - This value should be the same as the local size used in creating the 3419 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3420 calculated if N is given) For square matrices n is almost always m. 3421 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3422 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3423 . i - row indices 3424 . j - column indices 3425 - a - matrix values 3426 3427 Output Parameter: 3428 . mat - the matrix 3429 3430 Level: intermediate 3431 3432 Notes: 3433 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3434 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3435 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3436 3437 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3438 3439 The format which is used for the sparse matrix input, is equivalent to a 3440 row-major ordering.. i.e for the following matrix, the input data expected is 3441 as shown: 3442 3443 1 0 0 3444 2 0 3 P0 3445 ------- 3446 4 5 6 P1 3447 3448 Process0 [P0]: rows_owned=[0,1] 3449 i = {0,1,3} [size = nrow+1 = 2+1] 3450 j = {0,0,2} [size = nz = 6] 3451 v = {1,2,3} [size = nz = 6] 3452 3453 Process1 [P1]: rows_owned=[2] 3454 i = {0,3} [size = nrow+1 = 1+1] 3455 j = {0,1,2} [size = nz = 6] 3456 v = {4,5,6} [size = nz = 6] 3457 3458 .keywords: matrix, aij, compressed row, sparse, parallel 3459 3460 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3461 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3462 @*/ 3463 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3464 { 3465 PetscErrorCode ierr; 3466 3467 PetscFunctionBegin; 3468 if (i[0]) { 3469 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3470 } 3471 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3472 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3473 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3474 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3475 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3476 PetscFunctionReturn(0); 3477 } 3478 3479 #undef __FUNCT__ 3480 #define __FUNCT__ "MatCreateMPIAIJ" 3481 /*@C 3482 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3483 (the default parallel PETSc format). For good matrix assembly performance 3484 the user should preallocate the matrix storage by setting the parameters 3485 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3486 performance can be increased by more than a factor of 50. 3487 3488 Collective on MPI_Comm 3489 3490 Input Parameters: 3491 + comm - MPI communicator 3492 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3493 This value should be the same as the local size used in creating the 3494 y vector for the matrix-vector product y = Ax. 3495 . n - This value should be the same as the local size used in creating the 3496 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3497 calculated if N is given) For square matrices n is almost always m. 3498 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3499 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3500 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3501 (same value is used for all local rows) 3502 . d_nnz - array containing the number of nonzeros in the various rows of the 3503 DIAGONAL portion of the local submatrix (possibly different for each row) 3504 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3505 The size of this array is equal to the number of local rows, i.e 'm'. 3506 You must leave room for the diagonal entry even if it is zero. 3507 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3508 submatrix (same value is used for all local rows). 3509 - o_nnz - array containing the number of nonzeros in the various rows of the 3510 OFF-DIAGONAL portion of the local submatrix (possibly different for 3511 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3512 structure. The size of this array is equal to the number 3513 of local rows, i.e 'm'. 3514 3515 Output Parameter: 3516 . A - the matrix 3517 3518 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3519 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3520 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3521 3522 Notes: 3523 If the *_nnz parameter is given then the *_nz parameter is ignored 3524 3525 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3526 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3527 storage requirements for this matrix. 3528 3529 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3530 processor than it must be used on all processors that share the object for 3531 that argument. 3532 3533 The user MUST specify either the local or global matrix dimensions 3534 (possibly both). 3535 3536 The parallel matrix is partitioned across processors such that the 3537 first m0 rows belong to process 0, the next m1 rows belong to 3538 process 1, the next m2 rows belong to process 2 etc.. where 3539 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3540 values corresponding to [m x N] submatrix. 3541 3542 The columns are logically partitioned with the n0 columns belonging 3543 to 0th partition, the next n1 columns belonging to the next 3544 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3545 3546 The DIAGONAL portion of the local submatrix on any given processor 3547 is the submatrix corresponding to the rows and columns m,n 3548 corresponding to the given processor. i.e diagonal matrix on 3549 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3550 etc. The remaining portion of the local submatrix [m x (N-n)] 3551 constitute the OFF-DIAGONAL portion. The example below better 3552 illustrates this concept. 3553 3554 For a square global matrix we define each processor's diagonal portion 3555 to be its local rows and the corresponding columns (a square submatrix); 3556 each processor's off-diagonal portion encompasses the remainder of the 3557 local matrix (a rectangular submatrix). 3558 3559 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3560 3561 When calling this routine with a single process communicator, a matrix of 3562 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3563 type of communicator, use the construction mechanism: 3564 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3565 3566 By default, this format uses inodes (identical nodes) when possible. 3567 We search for consecutive rows with the same nonzero structure, thereby 3568 reusing matrix information to achieve increased efficiency. 3569 3570 Options Database Keys: 3571 + -mat_no_inode - Do not use inodes 3572 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3573 - -mat_aij_oneindex - Internally use indexing starting at 1 3574 rather than 0. Note that when calling MatSetValues(), 3575 the user still MUST index entries starting at 0! 3576 3577 3578 Example usage: 3579 3580 Consider the following 8x8 matrix with 34 non-zero values, that is 3581 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3582 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3583 as follows: 3584 3585 .vb 3586 1 2 0 | 0 3 0 | 0 4 3587 Proc0 0 5 6 | 7 0 0 | 8 0 3588 9 0 10 | 11 0 0 | 12 0 3589 ------------------------------------- 3590 13 0 14 | 15 16 17 | 0 0 3591 Proc1 0 18 0 | 19 20 21 | 0 0 3592 0 0 0 | 22 23 0 | 24 0 3593 ------------------------------------- 3594 Proc2 25 26 27 | 0 0 28 | 29 0 3595 30 0 0 | 31 32 33 | 0 34 3596 .ve 3597 3598 This can be represented as a collection of submatrices as: 3599 3600 .vb 3601 A B C 3602 D E F 3603 G H I 3604 .ve 3605 3606 Where the submatrices A,B,C are owned by proc0, D,E,F are 3607 owned by proc1, G,H,I are owned by proc2. 3608 3609 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3610 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3611 The 'M','N' parameters are 8,8, and have the same values on all procs. 3612 3613 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3614 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3615 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3616 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3617 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3618 matrix, ans [DF] as another SeqAIJ matrix. 3619 3620 When d_nz, o_nz parameters are specified, d_nz storage elements are 3621 allocated for every row of the local diagonal submatrix, and o_nz 3622 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3623 One way to choose d_nz and o_nz is to use the max nonzerors per local 3624 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3625 In this case, the values of d_nz,o_nz are: 3626 .vb 3627 proc0 : dnz = 2, o_nz = 2 3628 proc1 : dnz = 3, o_nz = 2 3629 proc2 : dnz = 1, o_nz = 4 3630 .ve 3631 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3632 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3633 for proc3. i.e we are using 12+15+10=37 storage locations to store 3634 34 values. 3635 3636 When d_nnz, o_nnz parameters are specified, the storage is specified 3637 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3638 In the above case the values for d_nnz,o_nnz are: 3639 .vb 3640 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3641 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3642 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3643 .ve 3644 Here the space allocated is sum of all the above values i.e 34, and 3645 hence pre-allocation is perfect. 3646 3647 Level: intermediate 3648 3649 .keywords: matrix, aij, compressed row, sparse, parallel 3650 3651 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3652 MPIAIJ, MatCreateMPIAIJWithArrays() 3653 @*/ 3654 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3655 { 3656 PetscErrorCode ierr; 3657 PetscMPIInt size; 3658 3659 PetscFunctionBegin; 3660 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3661 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3662 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3663 if (size > 1) { 3664 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3665 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3666 } else { 3667 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3668 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3669 } 3670 PetscFunctionReturn(0); 3671 } 3672 3673 #undef __FUNCT__ 3674 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3675 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3676 { 3677 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3678 3679 PetscFunctionBegin; 3680 *Ad = a->A; 3681 *Ao = a->B; 3682 *colmap = a->garray; 3683 PetscFunctionReturn(0); 3684 } 3685 3686 #undef __FUNCT__ 3687 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3688 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3689 { 3690 PetscErrorCode ierr; 3691 PetscInt i; 3692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3693 3694 PetscFunctionBegin; 3695 if (coloring->ctype == IS_COLORING_GLOBAL) { 3696 ISColoringValue *allcolors,*colors; 3697 ISColoring ocoloring; 3698 3699 /* set coloring for diagonal portion */ 3700 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3701 3702 /* set coloring for off-diagonal portion */ 3703 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3704 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3705 for (i=0; i<a->B->cmap->n; i++) { 3706 colors[i] = allcolors[a->garray[i]]; 3707 } 3708 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3709 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3710 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3711 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3712 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3713 ISColoringValue *colors; 3714 PetscInt *larray; 3715 ISColoring ocoloring; 3716 3717 /* set coloring for diagonal portion */ 3718 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3719 for (i=0; i<a->A->cmap->n; i++) { 3720 larray[i] = i + A->cmap->rstart; 3721 } 3722 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3723 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3724 for (i=0; i<a->A->cmap->n; i++) { 3725 colors[i] = coloring->colors[larray[i]]; 3726 } 3727 ierr = PetscFree(larray);CHKERRQ(ierr); 3728 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3729 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3730 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3731 3732 /* set coloring for off-diagonal portion */ 3733 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3734 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3735 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3736 for (i=0; i<a->B->cmap->n; i++) { 3737 colors[i] = coloring->colors[larray[i]]; 3738 } 3739 ierr = PetscFree(larray);CHKERRQ(ierr); 3740 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3741 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3742 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3743 } else { 3744 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3745 } 3746 3747 PetscFunctionReturn(0); 3748 } 3749 3750 #if defined(PETSC_HAVE_ADIC) 3751 #undef __FUNCT__ 3752 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3753 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3754 { 3755 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3756 PetscErrorCode ierr; 3757 3758 PetscFunctionBegin; 3759 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3760 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3761 PetscFunctionReturn(0); 3762 } 3763 #endif 3764 3765 #undef __FUNCT__ 3766 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3767 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3768 { 3769 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3770 PetscErrorCode ierr; 3771 3772 PetscFunctionBegin; 3773 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3774 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3775 PetscFunctionReturn(0); 3776 } 3777 3778 #undef __FUNCT__ 3779 #define __FUNCT__ "MatMerge" 3780 /*@ 3781 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3782 matrices from each processor 3783 3784 Collective on MPI_Comm 3785 3786 Input Parameters: 3787 + comm - the communicators the parallel matrix will live on 3788 . inmat - the input sequential matrices 3789 . n - number of local columns (or PETSC_DECIDE) 3790 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3791 3792 Output Parameter: 3793 . outmat - the parallel matrix generated 3794 3795 Level: advanced 3796 3797 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3798 3799 @*/ 3800 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3801 { 3802 PetscErrorCode ierr; 3803 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3804 PetscInt *indx; 3805 PetscScalar *values; 3806 3807 PetscFunctionBegin; 3808 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3809 if (scall == MAT_INITIAL_MATRIX){ 3810 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3811 if (n == PETSC_DECIDE){ 3812 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3813 } 3814 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3815 rstart -= m; 3816 3817 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3818 for (i=0;i<m;i++) { 3819 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3820 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3821 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3822 } 3823 /* This routine will ONLY return MPIAIJ type matrix */ 3824 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3825 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3826 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3827 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3828 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3829 3830 } else if (scall == MAT_REUSE_MATRIX){ 3831 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3832 } else { 3833 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3834 } 3835 3836 for (i=0;i<m;i++) { 3837 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3838 Ii = i + rstart; 3839 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3840 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3841 } 3842 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3843 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3844 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3845 3846 PetscFunctionReturn(0); 3847 } 3848 3849 #undef __FUNCT__ 3850 #define __FUNCT__ "MatFileSplit" 3851 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3852 { 3853 PetscErrorCode ierr; 3854 PetscMPIInt rank; 3855 PetscInt m,N,i,rstart,nnz; 3856 size_t len; 3857 const PetscInt *indx; 3858 PetscViewer out; 3859 char *name; 3860 Mat B; 3861 const PetscScalar *values; 3862 3863 PetscFunctionBegin; 3864 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3865 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3866 /* Should this be the type of the diagonal block of A? */ 3867 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3868 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3869 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3870 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3871 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3872 for (i=0;i<m;i++) { 3873 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3874 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3875 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3876 } 3877 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3878 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 3880 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3881 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3882 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3883 sprintf(name,"%s.%d",outfile,rank); 3884 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3885 ierr = PetscFree(name); 3886 ierr = MatView(B,out);CHKERRQ(ierr); 3887 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3888 ierr = MatDestroy(B);CHKERRQ(ierr); 3889 PetscFunctionReturn(0); 3890 } 3891 3892 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3893 #undef __FUNCT__ 3894 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3895 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3896 { 3897 PetscErrorCode ierr; 3898 Mat_Merge_SeqsToMPI *merge; 3899 PetscContainer container; 3900 3901 PetscFunctionBegin; 3902 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3903 if (container) { 3904 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3905 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3906 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3907 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3908 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3909 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3910 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3911 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3912 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3913 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3914 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3915 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3916 3917 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3918 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3919 } 3920 ierr = PetscFree(merge);CHKERRQ(ierr); 3921 3922 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3923 PetscFunctionReturn(0); 3924 } 3925 3926 #include "../src/mat/utils/freespace.h" 3927 #include "petscbt.h" 3928 3929 #undef __FUNCT__ 3930 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3931 /*@C 3932 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3933 matrices from each processor 3934 3935 Collective on MPI_Comm 3936 3937 Input Parameters: 3938 + comm - the communicators the parallel matrix will live on 3939 . seqmat - the input sequential matrices 3940 . m - number of local rows (or PETSC_DECIDE) 3941 . n - number of local columns (or PETSC_DECIDE) 3942 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3943 3944 Output Parameter: 3945 . mpimat - the parallel matrix generated 3946 3947 Level: advanced 3948 3949 Notes: 3950 The dimensions of the sequential matrix in each processor MUST be the same. 3951 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3952 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3953 @*/ 3954 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3955 { 3956 PetscErrorCode ierr; 3957 MPI_Comm comm=((PetscObject)mpimat)->comm; 3958 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3959 PetscMPIInt size,rank,taga,*len_s; 3960 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 3961 PetscInt proc,m; 3962 PetscInt **buf_ri,**buf_rj; 3963 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3964 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3965 MPI_Request *s_waits,*r_waits; 3966 MPI_Status *status; 3967 MatScalar *aa=a->a; 3968 MatScalar **abuf_r,*ba_i; 3969 Mat_Merge_SeqsToMPI *merge; 3970 PetscContainer container; 3971 3972 PetscFunctionBegin; 3973 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3974 3975 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3976 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3977 3978 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3979 if (container) { 3980 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3981 } 3982 bi = merge->bi; 3983 bj = merge->bj; 3984 buf_ri = merge->buf_ri; 3985 buf_rj = merge->buf_rj; 3986 3987 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3988 owners = merge->rowmap.range; 3989 len_s = merge->len_s; 3990 3991 /* send and recv matrix values */ 3992 /*-----------------------------*/ 3993 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3994 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3995 3996 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3997 for (proc=0,k=0; proc<size; proc++){ 3998 if (!len_s[proc]) continue; 3999 i = owners[proc]; 4000 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4001 k++; 4002 } 4003 4004 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4005 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4006 ierr = PetscFree(status);CHKERRQ(ierr); 4007 4008 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4009 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4010 4011 /* insert mat values of mpimat */ 4012 /*----------------------------*/ 4013 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4014 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4015 nextrow = buf_ri_k + merge->nrecv; 4016 nextai = nextrow + merge->nrecv; 4017 4018 for (k=0; k<merge->nrecv; k++){ 4019 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4020 nrows = *(buf_ri_k[k]); 4021 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4022 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4023 } 4024 4025 /* set values of ba */ 4026 m = merge->rowmap.n; 4027 for (i=0; i<m; i++) { 4028 arow = owners[rank] + i; 4029 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4030 bnzi = bi[i+1] - bi[i]; 4031 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4032 4033 /* add local non-zero vals of this proc's seqmat into ba */ 4034 anzi = ai[arow+1] - ai[arow]; 4035 aj = a->j + ai[arow]; 4036 aa = a->a + ai[arow]; 4037 nextaj = 0; 4038 for (j=0; nextaj<anzi; j++){ 4039 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4040 ba_i[j] += aa[nextaj++]; 4041 } 4042 } 4043 4044 /* add received vals into ba */ 4045 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4046 /* i-th row */ 4047 if (i == *nextrow[k]) { 4048 anzi = *(nextai[k]+1) - *nextai[k]; 4049 aj = buf_rj[k] + *(nextai[k]); 4050 aa = abuf_r[k] + *(nextai[k]); 4051 nextaj = 0; 4052 for (j=0; nextaj<anzi; j++){ 4053 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4054 ba_i[j] += aa[nextaj++]; 4055 } 4056 } 4057 nextrow[k]++; nextai[k]++; 4058 } 4059 } 4060 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4061 } 4062 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4063 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4064 4065 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4066 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4067 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4068 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4069 PetscFunctionReturn(0); 4070 } 4071 4072 #undef __FUNCT__ 4073 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4074 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4075 { 4076 PetscErrorCode ierr; 4077 Mat B_mpi; 4078 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4079 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4080 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4081 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4082 PetscInt len,proc,*dnz,*onz; 4083 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4084 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4085 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4086 MPI_Status *status; 4087 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4088 PetscBT lnkbt; 4089 Mat_Merge_SeqsToMPI *merge; 4090 PetscContainer container; 4091 4092 PetscFunctionBegin; 4093 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4094 4095 /* make sure it is a PETSc comm */ 4096 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4097 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4098 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4099 4100 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4101 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4102 4103 /* determine row ownership */ 4104 /*---------------------------------------------------------*/ 4105 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4106 merge->rowmap.n = m; 4107 merge->rowmap.N = M; 4108 merge->rowmap.bs = 1; 4109 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4110 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4111 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4112 4113 m = merge->rowmap.n; 4114 M = merge->rowmap.N; 4115 owners = merge->rowmap.range; 4116 4117 /* determine the number of messages to send, their lengths */ 4118 /*---------------------------------------------------------*/ 4119 len_s = merge->len_s; 4120 4121 len = 0; /* length of buf_si[] */ 4122 merge->nsend = 0; 4123 for (proc=0; proc<size; proc++){ 4124 len_si[proc] = 0; 4125 if (proc == rank){ 4126 len_s[proc] = 0; 4127 } else { 4128 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4129 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4130 } 4131 if (len_s[proc]) { 4132 merge->nsend++; 4133 nrows = 0; 4134 for (i=owners[proc]; i<owners[proc+1]; i++){ 4135 if (ai[i+1] > ai[i]) nrows++; 4136 } 4137 len_si[proc] = 2*(nrows+1); 4138 len += len_si[proc]; 4139 } 4140 } 4141 4142 /* determine the number and length of messages to receive for ij-structure */ 4143 /*-------------------------------------------------------------------------*/ 4144 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4145 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4146 4147 /* post the Irecv of j-structure */ 4148 /*-------------------------------*/ 4149 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4150 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4151 4152 /* post the Isend of j-structure */ 4153 /*--------------------------------*/ 4154 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4155 sj_waits = si_waits + merge->nsend; 4156 4157 for (proc=0, k=0; proc<size; proc++){ 4158 if (!len_s[proc]) continue; 4159 i = owners[proc]; 4160 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4161 k++; 4162 } 4163 4164 /* receives and sends of j-structure are complete */ 4165 /*------------------------------------------------*/ 4166 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4167 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4168 4169 /* send and recv i-structure */ 4170 /*---------------------------*/ 4171 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4172 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4173 4174 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4175 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4176 for (proc=0,k=0; proc<size; proc++){ 4177 if (!len_s[proc]) continue; 4178 /* form outgoing message for i-structure: 4179 buf_si[0]: nrows to be sent 4180 [1:nrows]: row index (global) 4181 [nrows+1:2*nrows+1]: i-structure index 4182 */ 4183 /*-------------------------------------------*/ 4184 nrows = len_si[proc]/2 - 1; 4185 buf_si_i = buf_si + nrows+1; 4186 buf_si[0] = nrows; 4187 buf_si_i[0] = 0; 4188 nrows = 0; 4189 for (i=owners[proc]; i<owners[proc+1]; i++){ 4190 anzi = ai[i+1] - ai[i]; 4191 if (anzi) { 4192 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4193 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4194 nrows++; 4195 } 4196 } 4197 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4198 k++; 4199 buf_si += len_si[proc]; 4200 } 4201 4202 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4203 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4204 4205 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4206 for (i=0; i<merge->nrecv; i++){ 4207 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4208 } 4209 4210 ierr = PetscFree(len_si);CHKERRQ(ierr); 4211 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4212 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4213 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4214 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4215 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4216 ierr = PetscFree(status);CHKERRQ(ierr); 4217 4218 /* compute a local seq matrix in each processor */ 4219 /*----------------------------------------------*/ 4220 /* allocate bi array and free space for accumulating nonzero column info */ 4221 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4222 bi[0] = 0; 4223 4224 /* create and initialize a linked list */ 4225 nlnk = N+1; 4226 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4227 4228 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4229 len = 0; 4230 len = ai[owners[rank+1]] - ai[owners[rank]]; 4231 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4232 current_space = free_space; 4233 4234 /* determine symbolic info for each local row */ 4235 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4236 nextrow = buf_ri_k + merge->nrecv; 4237 nextai = nextrow + merge->nrecv; 4238 for (k=0; k<merge->nrecv; k++){ 4239 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4240 nrows = *buf_ri_k[k]; 4241 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4242 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4243 } 4244 4245 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4246 len = 0; 4247 for (i=0;i<m;i++) { 4248 bnzi = 0; 4249 /* add local non-zero cols of this proc's seqmat into lnk */ 4250 arow = owners[rank] + i; 4251 anzi = ai[arow+1] - ai[arow]; 4252 aj = a->j + ai[arow]; 4253 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4254 bnzi += nlnk; 4255 /* add received col data into lnk */ 4256 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4257 if (i == *nextrow[k]) { /* i-th row */ 4258 anzi = *(nextai[k]+1) - *nextai[k]; 4259 aj = buf_rj[k] + *nextai[k]; 4260 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4261 bnzi += nlnk; 4262 nextrow[k]++; nextai[k]++; 4263 } 4264 } 4265 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4266 4267 /* if free space is not available, make more free space */ 4268 if (current_space->local_remaining<bnzi) { 4269 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4270 nspacedouble++; 4271 } 4272 /* copy data into free space, then initialize lnk */ 4273 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4274 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4275 4276 current_space->array += bnzi; 4277 current_space->local_used += bnzi; 4278 current_space->local_remaining -= bnzi; 4279 4280 bi[i+1] = bi[i] + bnzi; 4281 } 4282 4283 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4284 4285 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4286 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4287 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4288 4289 /* create symbolic parallel matrix B_mpi */ 4290 /*---------------------------------------*/ 4291 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4292 if (n==PETSC_DECIDE) { 4293 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4294 } else { 4295 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4296 } 4297 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4298 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4299 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4300 4301 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4302 B_mpi->assembled = PETSC_FALSE; 4303 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4304 merge->bi = bi; 4305 merge->bj = bj; 4306 merge->buf_ri = buf_ri; 4307 merge->buf_rj = buf_rj; 4308 merge->coi = PETSC_NULL; 4309 merge->coj = PETSC_NULL; 4310 merge->owners_co = PETSC_NULL; 4311 4312 /* attach the supporting struct to B_mpi for reuse */ 4313 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4314 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4315 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4316 *mpimat = B_mpi; 4317 4318 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4319 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4320 PetscFunctionReturn(0); 4321 } 4322 4323 #undef __FUNCT__ 4324 #define __FUNCT__ "MatMerge_SeqsToMPI" 4325 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4326 { 4327 PetscErrorCode ierr; 4328 4329 PetscFunctionBegin; 4330 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4331 if (scall == MAT_INITIAL_MATRIX){ 4332 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4333 } 4334 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4335 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4336 PetscFunctionReturn(0); 4337 } 4338 4339 #undef __FUNCT__ 4340 #define __FUNCT__ "MatGetLocalMat" 4341 /*@ 4342 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4343 4344 Not Collective 4345 4346 Input Parameters: 4347 + A - the matrix 4348 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4349 4350 Output Parameter: 4351 . A_loc - the local sequential matrix generated 4352 4353 Level: developer 4354 4355 @*/ 4356 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4357 { 4358 PetscErrorCode ierr; 4359 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4360 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4361 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4362 MatScalar *aa=a->a,*ba=b->a,*cam; 4363 PetscScalar *ca; 4364 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4365 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4366 4367 PetscFunctionBegin; 4368 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4369 if (scall == MAT_INITIAL_MATRIX){ 4370 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4371 ci[0] = 0; 4372 for (i=0; i<am; i++){ 4373 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4374 } 4375 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4376 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4377 k = 0; 4378 for (i=0; i<am; i++) { 4379 ncols_o = bi[i+1] - bi[i]; 4380 ncols_d = ai[i+1] - ai[i]; 4381 /* off-diagonal portion of A */ 4382 for (jo=0; jo<ncols_o; jo++) { 4383 col = cmap[*bj]; 4384 if (col >= cstart) break; 4385 cj[k] = col; bj++; 4386 ca[k++] = *ba++; 4387 } 4388 /* diagonal portion of A */ 4389 for (j=0; j<ncols_d; j++) { 4390 cj[k] = cstart + *aj++; 4391 ca[k++] = *aa++; 4392 } 4393 /* off-diagonal portion of A */ 4394 for (j=jo; j<ncols_o; j++) { 4395 cj[k] = cmap[*bj++]; 4396 ca[k++] = *ba++; 4397 } 4398 } 4399 /* put together the new matrix */ 4400 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4401 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4402 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4403 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4404 mat->free_a = PETSC_TRUE; 4405 mat->free_ij = PETSC_TRUE; 4406 mat->nonew = 0; 4407 } else if (scall == MAT_REUSE_MATRIX){ 4408 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4409 ci = mat->i; cj = mat->j; cam = mat->a; 4410 for (i=0; i<am; i++) { 4411 /* off-diagonal portion of A */ 4412 ncols_o = bi[i+1] - bi[i]; 4413 for (jo=0; jo<ncols_o; jo++) { 4414 col = cmap[*bj]; 4415 if (col >= cstart) break; 4416 *cam++ = *ba++; bj++; 4417 } 4418 /* diagonal portion of A */ 4419 ncols_d = ai[i+1] - ai[i]; 4420 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4421 /* off-diagonal portion of A */ 4422 for (j=jo; j<ncols_o; j++) { 4423 *cam++ = *ba++; bj++; 4424 } 4425 } 4426 } else { 4427 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4428 } 4429 4430 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4431 PetscFunctionReturn(0); 4432 } 4433 4434 #undef __FUNCT__ 4435 #define __FUNCT__ "MatGetLocalMatCondensed" 4436 /*@C 4437 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4438 4439 Not Collective 4440 4441 Input Parameters: 4442 + A - the matrix 4443 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4444 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4445 4446 Output Parameter: 4447 . A_loc - the local sequential matrix generated 4448 4449 Level: developer 4450 4451 @*/ 4452 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4453 { 4454 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4455 PetscErrorCode ierr; 4456 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4457 IS isrowa,iscola; 4458 Mat *aloc; 4459 4460 PetscFunctionBegin; 4461 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4462 if (!row){ 4463 start = A->rmap->rstart; end = A->rmap->rend; 4464 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4465 } else { 4466 isrowa = *row; 4467 } 4468 if (!col){ 4469 start = A->cmap->rstart; 4470 cmap = a->garray; 4471 nzA = a->A->cmap->n; 4472 nzB = a->B->cmap->n; 4473 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4474 ncols = 0; 4475 for (i=0; i<nzB; i++) { 4476 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4477 else break; 4478 } 4479 imark = i; 4480 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4481 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4482 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4483 ierr = PetscFree(idx);CHKERRQ(ierr); 4484 } else { 4485 iscola = *col; 4486 } 4487 if (scall != MAT_INITIAL_MATRIX){ 4488 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4489 aloc[0] = *A_loc; 4490 } 4491 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4492 *A_loc = aloc[0]; 4493 ierr = PetscFree(aloc);CHKERRQ(ierr); 4494 if (!row){ 4495 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4496 } 4497 if (!col){ 4498 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4499 } 4500 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4501 PetscFunctionReturn(0); 4502 } 4503 4504 #undef __FUNCT__ 4505 #define __FUNCT__ "MatGetBrowsOfAcols" 4506 /*@C 4507 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4508 4509 Collective on Mat 4510 4511 Input Parameters: 4512 + A,B - the matrices in mpiaij format 4513 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4514 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4515 4516 Output Parameter: 4517 + rowb, colb - index sets of rows and columns of B to extract 4518 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4519 - B_seq - the sequential matrix generated 4520 4521 Level: developer 4522 4523 @*/ 4524 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4525 { 4526 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4527 PetscErrorCode ierr; 4528 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4529 IS isrowb,iscolb; 4530 Mat *bseq; 4531 4532 PetscFunctionBegin; 4533 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4534 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4535 } 4536 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4537 4538 if (scall == MAT_INITIAL_MATRIX){ 4539 start = A->cmap->rstart; 4540 cmap = a->garray; 4541 nzA = a->A->cmap->n; 4542 nzB = a->B->cmap->n; 4543 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4544 ncols = 0; 4545 for (i=0; i<nzB; i++) { /* row < local row index */ 4546 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4547 else break; 4548 } 4549 imark = i; 4550 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4551 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4552 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4553 ierr = PetscFree(idx);CHKERRQ(ierr); 4554 *brstart = imark; 4555 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4556 } else { 4557 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4558 isrowb = *rowb; iscolb = *colb; 4559 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4560 bseq[0] = *B_seq; 4561 } 4562 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4563 *B_seq = bseq[0]; 4564 ierr = PetscFree(bseq);CHKERRQ(ierr); 4565 if (!rowb){ 4566 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4567 } else { 4568 *rowb = isrowb; 4569 } 4570 if (!colb){ 4571 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4572 } else { 4573 *colb = iscolb; 4574 } 4575 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4576 PetscFunctionReturn(0); 4577 } 4578 4579 #undef __FUNCT__ 4580 #define __FUNCT__ "MatGetBrowsOfAoCols" 4581 /*@C 4582 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4583 of the OFF-DIAGONAL portion of local A 4584 4585 Collective on Mat 4586 4587 Input Parameters: 4588 + A,B - the matrices in mpiaij format 4589 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4590 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4591 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4592 4593 Output Parameter: 4594 + B_oth - the sequential matrix generated 4595 4596 Level: developer 4597 4598 @*/ 4599 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4600 { 4601 VecScatter_MPI_General *gen_to,*gen_from; 4602 PetscErrorCode ierr; 4603 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4604 Mat_SeqAIJ *b_oth; 4605 VecScatter ctx=a->Mvctx; 4606 MPI_Comm comm=((PetscObject)ctx)->comm; 4607 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4608 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4609 PetscScalar *rvalues,*svalues; 4610 MatScalar *b_otha,*bufa,*bufA; 4611 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4612 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4613 MPI_Status *sstatus,rstatus; 4614 PetscMPIInt jj; 4615 PetscInt *cols,sbs,rbs; 4616 PetscScalar *vals; 4617 4618 PetscFunctionBegin; 4619 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4620 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4621 } 4622 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4623 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4624 4625 gen_to = (VecScatter_MPI_General*)ctx->todata; 4626 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4627 rvalues = gen_from->values; /* holds the length of receiving row */ 4628 svalues = gen_to->values; /* holds the length of sending row */ 4629 nrecvs = gen_from->n; 4630 nsends = gen_to->n; 4631 4632 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4633 srow = gen_to->indices; /* local row index to be sent */ 4634 sstarts = gen_to->starts; 4635 sprocs = gen_to->procs; 4636 sstatus = gen_to->sstatus; 4637 sbs = gen_to->bs; 4638 rstarts = gen_from->starts; 4639 rprocs = gen_from->procs; 4640 rbs = gen_from->bs; 4641 4642 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4643 if (scall == MAT_INITIAL_MATRIX){ 4644 /* i-array */ 4645 /*---------*/ 4646 /* post receives */ 4647 for (i=0; i<nrecvs; i++){ 4648 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4649 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4650 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4651 } 4652 4653 /* pack the outgoing message */ 4654 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4655 rstartsj = sstartsj + nsends +1; 4656 sstartsj[0] = 0; rstartsj[0] = 0; 4657 len = 0; /* total length of j or a array to be sent */ 4658 k = 0; 4659 for (i=0; i<nsends; i++){ 4660 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4661 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4662 for (j=0; j<nrows; j++) { 4663 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4664 for (l=0; l<sbs; l++){ 4665 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4666 rowlen[j*sbs+l] = ncols; 4667 len += ncols; 4668 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4669 } 4670 k++; 4671 } 4672 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4673 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4674 } 4675 /* recvs and sends of i-array are completed */ 4676 i = nrecvs; 4677 while (i--) { 4678 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4679 } 4680 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4681 4682 /* allocate buffers for sending j and a arrays */ 4683 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4684 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4685 4686 /* create i-array of B_oth */ 4687 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4688 b_othi[0] = 0; 4689 len = 0; /* total length of j or a array to be received */ 4690 k = 0; 4691 for (i=0; i<nrecvs; i++){ 4692 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4693 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4694 for (j=0; j<nrows; j++) { 4695 b_othi[k+1] = b_othi[k] + rowlen[j]; 4696 len += rowlen[j]; k++; 4697 } 4698 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4699 } 4700 4701 /* allocate space for j and a arrrays of B_oth */ 4702 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4703 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4704 4705 /* j-array */ 4706 /*---------*/ 4707 /* post receives of j-array */ 4708 for (i=0; i<nrecvs; i++){ 4709 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4710 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4711 } 4712 4713 /* pack the outgoing message j-array */ 4714 k = 0; 4715 for (i=0; i<nsends; i++){ 4716 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4717 bufJ = bufj+sstartsj[i]; 4718 for (j=0; j<nrows; j++) { 4719 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4720 for (ll=0; ll<sbs; ll++){ 4721 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4722 for (l=0; l<ncols; l++){ 4723 *bufJ++ = cols[l]; 4724 } 4725 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4726 } 4727 } 4728 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4729 } 4730 4731 /* recvs and sends of j-array are completed */ 4732 i = nrecvs; 4733 while (i--) { 4734 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4735 } 4736 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4737 } else if (scall == MAT_REUSE_MATRIX){ 4738 sstartsj = *startsj; 4739 rstartsj = sstartsj + nsends +1; 4740 bufa = *bufa_ptr; 4741 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4742 b_otha = b_oth->a; 4743 } else { 4744 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4745 } 4746 4747 /* a-array */ 4748 /*---------*/ 4749 /* post receives of a-array */ 4750 for (i=0; i<nrecvs; i++){ 4751 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4752 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4753 } 4754 4755 /* pack the outgoing message a-array */ 4756 k = 0; 4757 for (i=0; i<nsends; i++){ 4758 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4759 bufA = bufa+sstartsj[i]; 4760 for (j=0; j<nrows; j++) { 4761 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4762 for (ll=0; ll<sbs; ll++){ 4763 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4764 for (l=0; l<ncols; l++){ 4765 *bufA++ = vals[l]; 4766 } 4767 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4768 } 4769 } 4770 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4771 } 4772 /* recvs and sends of a-array are completed */ 4773 i = nrecvs; 4774 while (i--) { 4775 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4776 } 4777 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4778 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4779 4780 if (scall == MAT_INITIAL_MATRIX){ 4781 /* put together the new matrix */ 4782 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4783 4784 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4785 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4786 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4787 b_oth->free_a = PETSC_TRUE; 4788 b_oth->free_ij = PETSC_TRUE; 4789 b_oth->nonew = 0; 4790 4791 ierr = PetscFree(bufj);CHKERRQ(ierr); 4792 if (!startsj || !bufa_ptr){ 4793 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4794 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4795 } else { 4796 *startsj = sstartsj; 4797 *bufa_ptr = bufa; 4798 } 4799 } 4800 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4801 PetscFunctionReturn(0); 4802 } 4803 4804 #undef __FUNCT__ 4805 #define __FUNCT__ "MatGetCommunicationStructs" 4806 /*@C 4807 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4808 4809 Not Collective 4810 4811 Input Parameters: 4812 . A - The matrix in mpiaij format 4813 4814 Output Parameter: 4815 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4816 . colmap - A map from global column index to local index into lvec 4817 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4818 4819 Level: developer 4820 4821 @*/ 4822 #if defined (PETSC_USE_CTABLE) 4823 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4824 #else 4825 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4826 #endif 4827 { 4828 Mat_MPIAIJ *a; 4829 4830 PetscFunctionBegin; 4831 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4832 PetscValidPointer(lvec, 2) 4833 PetscValidPointer(colmap, 3) 4834 PetscValidPointer(multScatter, 4) 4835 a = (Mat_MPIAIJ *) A->data; 4836 if (lvec) *lvec = a->lvec; 4837 if (colmap) *colmap = a->colmap; 4838 if (multScatter) *multScatter = a->Mvctx; 4839 PetscFunctionReturn(0); 4840 } 4841 4842 EXTERN_C_BEGIN 4843 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4844 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4845 EXTERN_C_END 4846 4847 #include "../src/mat/impls/dense/mpi/mpidense.h" 4848 4849 #undef __FUNCT__ 4850 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4851 /* 4852 Computes (B'*A')' since computing B*A directly is untenable 4853 4854 n p p 4855 ( ) ( ) ( ) 4856 m ( A ) * n ( B ) = m ( C ) 4857 ( ) ( ) ( ) 4858 4859 */ 4860 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4861 { 4862 PetscErrorCode ierr; 4863 Mat At,Bt,Ct; 4864 4865 PetscFunctionBegin; 4866 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4867 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4868 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4869 ierr = MatDestroy(At);CHKERRQ(ierr); 4870 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4871 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4872 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4873 PetscFunctionReturn(0); 4874 } 4875 4876 #undef __FUNCT__ 4877 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4878 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4879 { 4880 PetscErrorCode ierr; 4881 PetscInt m=A->rmap->n,n=B->cmap->n; 4882 Mat Cmat; 4883 4884 PetscFunctionBegin; 4885 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4886 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4887 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4888 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4889 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4890 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4891 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4892 *C = Cmat; 4893 PetscFunctionReturn(0); 4894 } 4895 4896 /* ----------------------------------------------------------------*/ 4897 #undef __FUNCT__ 4898 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4899 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4900 { 4901 PetscErrorCode ierr; 4902 4903 PetscFunctionBegin; 4904 if (scall == MAT_INITIAL_MATRIX){ 4905 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4906 } 4907 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4908 PetscFunctionReturn(0); 4909 } 4910 4911 EXTERN_C_BEGIN 4912 #if defined(PETSC_HAVE_MUMPS) 4913 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4914 #endif 4915 #if defined(PETSC_HAVE_PASTIX) 4916 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4917 #endif 4918 #if defined(PETSC_HAVE_SUPERLU_DIST) 4919 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4920 #endif 4921 #if defined(PETSC_HAVE_SPOOLES) 4922 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4923 #endif 4924 EXTERN_C_END 4925 4926 /*MC 4927 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4928 4929 Options Database Keys: 4930 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4931 4932 Level: beginner 4933 4934 .seealso: MatCreateMPIAIJ() 4935 M*/ 4936 4937 EXTERN_C_BEGIN 4938 #undef __FUNCT__ 4939 #define __FUNCT__ "MatCreate_MPIAIJ" 4940 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4941 { 4942 Mat_MPIAIJ *b; 4943 PetscErrorCode ierr; 4944 PetscMPIInt size; 4945 4946 PetscFunctionBegin; 4947 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4948 4949 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4950 B->data = (void*)b; 4951 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4952 B->rmap->bs = 1; 4953 B->assembled = PETSC_FALSE; 4954 B->mapping = 0; 4955 4956 B->insertmode = NOT_SET_VALUES; 4957 b->size = size; 4958 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4959 4960 /* build cache for off array entries formed */ 4961 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4962 b->donotstash = PETSC_FALSE; 4963 b->colmap = 0; 4964 b->garray = 0; 4965 b->roworiented = PETSC_TRUE; 4966 4967 /* stuff used for matrix vector multiply */ 4968 b->lvec = PETSC_NULL; 4969 b->Mvctx = PETSC_NULL; 4970 4971 /* stuff for MatGetRow() */ 4972 b->rowindices = 0; 4973 b->rowvalues = 0; 4974 b->getrowactive = PETSC_FALSE; 4975 4976 #if defined(PETSC_HAVE_SPOOLES) 4977 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 4978 "MatGetFactor_mpiaij_spooles", 4979 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 4980 #endif 4981 #if defined(PETSC_HAVE_MUMPS) 4982 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 4983 "MatGetFactor_mpiaij_mumps", 4984 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 4985 #endif 4986 #if defined(PETSC_HAVE_PASTIX) 4987 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 4988 "MatGetFactor_mpiaij_pastix", 4989 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 4990 #endif 4991 #if defined(PETSC_HAVE_SUPERLU_DIST) 4992 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 4993 "MatGetFactor_mpiaij_superlu_dist", 4994 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 4995 #endif 4996 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4997 "MatStoreValues_MPIAIJ", 4998 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4999 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5000 "MatRetrieveValues_MPIAIJ", 5001 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5002 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5003 "MatGetDiagonalBlock_MPIAIJ", 5004 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5005 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5006 "MatIsTranspose_MPIAIJ", 5007 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5008 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5009 "MatMPIAIJSetPreallocation_MPIAIJ", 5010 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5011 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5012 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5013 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5015 "MatDiagonalScaleLocal_MPIAIJ", 5016 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5017 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5018 "MatConvert_MPIAIJ_MPICSRPERM", 5019 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5020 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5021 "MatConvert_MPIAIJ_MPICRL", 5022 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5023 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5024 "MatMatMult_MPIDense_MPIAIJ", 5025 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5026 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5027 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5028 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5029 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5030 "MatMatMultNumeric_MPIDense_MPIAIJ", 5031 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5032 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5033 PetscFunctionReturn(0); 5034 } 5035 EXTERN_C_END 5036 5037 #undef __FUNCT__ 5038 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5039 /*@ 5040 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5041 and "off-diagonal" part of the matrix in CSR format. 5042 5043 Collective on MPI_Comm 5044 5045 Input Parameters: 5046 + comm - MPI communicator 5047 . m - number of local rows (Cannot be PETSC_DECIDE) 5048 . n - This value should be the same as the local size used in creating the 5049 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5050 calculated if N is given) For square matrices n is almost always m. 5051 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5052 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5053 . i - row indices for "diagonal" portion of matrix 5054 . j - column indices 5055 . a - matrix values 5056 . oi - row indices for "off-diagonal" portion of matrix 5057 . oj - column indices 5058 - oa - matrix values 5059 5060 Output Parameter: 5061 . mat - the matrix 5062 5063 Level: advanced 5064 5065 Notes: 5066 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5067 5068 The i and j indices are 0 based 5069 5070 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5071 5072 5073 .keywords: matrix, aij, compressed row, sparse, parallel 5074 5075 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5076 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5077 @*/ 5078 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5079 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5080 { 5081 PetscErrorCode ierr; 5082 Mat_MPIAIJ *maij; 5083 5084 PetscFunctionBegin; 5085 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5086 if (i[0]) { 5087 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5088 } 5089 if (oi[0]) { 5090 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5091 } 5092 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5093 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5094 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5095 maij = (Mat_MPIAIJ*) (*mat)->data; 5096 maij->donotstash = PETSC_TRUE; 5097 (*mat)->preallocated = PETSC_TRUE; 5098 5099 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5100 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5101 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5102 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5103 5104 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5105 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5106 5107 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5108 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5109 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5110 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5111 5112 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5113 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5114 PetscFunctionReturn(0); 5115 } 5116 5117 /* 5118 Special version for direct calls from Fortran 5119 */ 5120 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5121 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5122 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5123 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5124 #endif 5125 5126 /* Change these macros so can be used in void function */ 5127 #undef CHKERRQ 5128 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5129 #undef SETERRQ2 5130 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5131 #undef SETERRQ 5132 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5133 5134 EXTERN_C_BEGIN 5135 #undef __FUNCT__ 5136 #define __FUNCT__ "matsetvaluesmpiaij_" 5137 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5138 { 5139 Mat mat = *mmat; 5140 PetscInt m = *mm, n = *mn; 5141 InsertMode addv = *maddv; 5142 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5143 PetscScalar value; 5144 PetscErrorCode ierr; 5145 5146 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5147 if (mat->insertmode == NOT_SET_VALUES) { 5148 mat->insertmode = addv; 5149 } 5150 #if defined(PETSC_USE_DEBUG) 5151 else if (mat->insertmode != addv) { 5152 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5153 } 5154 #endif 5155 { 5156 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5157 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5158 PetscTruth roworiented = aij->roworiented; 5159 5160 /* Some Variables required in the macro */ 5161 Mat A = aij->A; 5162 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5163 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5164 MatScalar *aa = a->a; 5165 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5166 Mat B = aij->B; 5167 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5168 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5169 MatScalar *ba = b->a; 5170 5171 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5172 PetscInt nonew = a->nonew; 5173 MatScalar *ap1,*ap2; 5174 5175 PetscFunctionBegin; 5176 for (i=0; i<m; i++) { 5177 if (im[i] < 0) continue; 5178 #if defined(PETSC_USE_DEBUG) 5179 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5180 #endif 5181 if (im[i] >= rstart && im[i] < rend) { 5182 row = im[i] - rstart; 5183 lastcol1 = -1; 5184 rp1 = aj + ai[row]; 5185 ap1 = aa + ai[row]; 5186 rmax1 = aimax[row]; 5187 nrow1 = ailen[row]; 5188 low1 = 0; 5189 high1 = nrow1; 5190 lastcol2 = -1; 5191 rp2 = bj + bi[row]; 5192 ap2 = ba + bi[row]; 5193 rmax2 = bimax[row]; 5194 nrow2 = bilen[row]; 5195 low2 = 0; 5196 high2 = nrow2; 5197 5198 for (j=0; j<n; j++) { 5199 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5200 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5201 if (in[j] >= cstart && in[j] < cend){ 5202 col = in[j] - cstart; 5203 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5204 } else if (in[j] < 0) continue; 5205 #if defined(PETSC_USE_DEBUG) 5206 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5207 #endif 5208 else { 5209 if (mat->was_assembled) { 5210 if (!aij->colmap) { 5211 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5212 } 5213 #if defined (PETSC_USE_CTABLE) 5214 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5215 col--; 5216 #else 5217 col = aij->colmap[in[j]] - 1; 5218 #endif 5219 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5220 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5221 col = in[j]; 5222 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5223 B = aij->B; 5224 b = (Mat_SeqAIJ*)B->data; 5225 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5226 rp2 = bj + bi[row]; 5227 ap2 = ba + bi[row]; 5228 rmax2 = bimax[row]; 5229 nrow2 = bilen[row]; 5230 low2 = 0; 5231 high2 = nrow2; 5232 bm = aij->B->rmap->n; 5233 ba = b->a; 5234 } 5235 } else col = in[j]; 5236 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5237 } 5238 } 5239 } else { 5240 if (!aij->donotstash) { 5241 if (roworiented) { 5242 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5243 } else { 5244 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5245 } 5246 } 5247 } 5248 }} 5249 PetscFunctionReturnVoid(); 5250 } 5251 EXTERN_C_END 5252 5253