1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "../src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap->N; 932 header[2] = mat->cmap->N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap->n; 938 range = mat->rmap->range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap->n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap->n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap->n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap->n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap->N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 1114 1115 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1116 if (!rank) { 1117 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1118 } else { 1119 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1120 } 1121 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1122 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1123 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1125 1126 /* copy over the A part */ 1127 Aloc = (Mat_SeqAIJ*)aij->A->data; 1128 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1129 row = mat->rmap->rstart; 1130 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1131 for (i=0; i<m; i++) { 1132 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1133 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1134 } 1135 aj = Aloc->j; 1136 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1137 1138 /* copy over the B part */ 1139 Aloc = (Mat_SeqAIJ*)aij->B->data; 1140 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1141 row = mat->rmap->rstart; 1142 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1143 ct = cols; 1144 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1148 } 1149 ierr = PetscFree(ct);CHKERRQ(ierr); 1150 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1151 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 /* 1153 Everyone has to call to draw the matrix since the graphics waits are 1154 synchronized across all processors that share the PetscDraw object 1155 */ 1156 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1157 if (!rank) { 1158 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1159 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1160 } 1161 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1162 ierr = MatDestroy(A);CHKERRQ(ierr); 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatView_MPIAIJ" 1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscErrorCode ierr; 1172 PetscTruth iascii,isdraw,issocket,isbinary; 1173 1174 PetscFunctionBegin; 1175 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1176 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1177 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1178 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1179 if (iascii || isdraw || isbinary || issocket) { 1180 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1181 } else { 1182 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRelax_MPIAIJ" 1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1190 { 1191 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1192 PetscErrorCode ierr; 1193 Vec bb1; 1194 1195 PetscFunctionBegin; 1196 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1197 1198 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1199 if (flag & SOR_ZERO_INITIAL_GUESS) { 1200 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1201 its--; 1202 } 1203 1204 while (its--) { 1205 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 1208 /* update rhs: bb1 = bb - B*x */ 1209 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1211 1212 /* local sweep */ 1213 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1214 } 1215 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1216 if (flag & SOR_ZERO_INITIAL_GUESS) { 1217 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1218 its--; 1219 } 1220 while (its--) { 1221 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 1224 /* update rhs: bb1 = bb - B*x */ 1225 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1226 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1227 1228 /* local sweep */ 1229 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1230 } 1231 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1232 if (flag & SOR_ZERO_INITIAL_GUESS) { 1233 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1234 its--; 1235 } 1236 while (its--) { 1237 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* update rhs: bb1 = bb - B*x */ 1241 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1242 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1243 1244 /* local sweep */ 1245 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1246 } 1247 } else { 1248 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1249 } 1250 1251 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatPermute_MPIAIJ" 1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1258 { 1259 MPI_Comm comm,pcomm; 1260 PetscInt first,local_size,nrows; 1261 const PetscInt *rows; 1262 PetscMPIInt size; 1263 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1268 /* make a collective version of 'rowp' */ 1269 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1270 if (pcomm==comm) { 1271 crowp = rowp; 1272 } else { 1273 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1274 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1275 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1276 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1277 } 1278 /* collect the global row permutation and invert it */ 1279 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1280 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1281 if (pcomm!=comm) { 1282 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1283 } 1284 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1285 /* get the local target indices */ 1286 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1287 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1288 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1289 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1290 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1291 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1292 /* the column permutation is so much easier; 1293 make a local version of 'colp' and invert it */ 1294 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1295 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1296 if (size==1) { 1297 lcolp = colp; 1298 } else { 1299 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1300 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1301 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1302 } 1303 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1304 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1305 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1306 if (size>1) { 1307 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1308 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1309 } 1310 /* now we just get the submatrix */ 1311 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1312 /* clean up */ 1313 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1314 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1320 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1321 { 1322 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1323 Mat A = mat->A,B = mat->B; 1324 PetscErrorCode ierr; 1325 PetscReal isend[5],irecv[5]; 1326 1327 PetscFunctionBegin; 1328 info->block_size = 1.0; 1329 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1330 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1331 isend[3] = info->memory; isend[4] = info->mallocs; 1332 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1333 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1334 isend[3] += info->memory; isend[4] += info->mallocs; 1335 if (flag == MAT_LOCAL) { 1336 info->nz_used = isend[0]; 1337 info->nz_allocated = isend[1]; 1338 info->nz_unneeded = isend[2]; 1339 info->memory = isend[3]; 1340 info->mallocs = isend[4]; 1341 } else if (flag == MAT_GLOBAL_MAX) { 1342 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1343 info->nz_used = irecv[0]; 1344 info->nz_allocated = irecv[1]; 1345 info->nz_unneeded = irecv[2]; 1346 info->memory = irecv[3]; 1347 info->mallocs = irecv[4]; 1348 } else if (flag == MAT_GLOBAL_SUM) { 1349 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1350 info->nz_used = irecv[0]; 1351 info->nz_allocated = irecv[1]; 1352 info->nz_unneeded = irecv[2]; 1353 info->memory = irecv[3]; 1354 info->mallocs = irecv[4]; 1355 } 1356 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1357 info->fill_ratio_needed = 0; 1358 info->factor_mallocs = 0; 1359 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "MatSetOption_MPIAIJ" 1365 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1366 { 1367 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 switch (op) { 1372 case MAT_NEW_NONZERO_LOCATIONS: 1373 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1374 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1375 case MAT_KEEP_ZEROED_ROWS: 1376 case MAT_NEW_NONZERO_LOCATION_ERR: 1377 case MAT_USE_INODES: 1378 case MAT_IGNORE_ZERO_ENTRIES: 1379 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1380 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1381 break; 1382 case MAT_ROW_ORIENTED: 1383 a->roworiented = flg; 1384 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1385 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1386 break; 1387 case MAT_NEW_DIAGONALS: 1388 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1389 break; 1390 case MAT_IGNORE_OFF_PROC_ENTRIES: 1391 a->donotstash = PETSC_TRUE; 1392 break; 1393 case MAT_SYMMETRIC: 1394 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1395 break; 1396 case MAT_STRUCTURALLY_SYMMETRIC: 1397 case MAT_HERMITIAN: 1398 case MAT_SYMMETRY_ETERNAL: 1399 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1400 break; 1401 default: 1402 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1403 } 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatGetRow_MPIAIJ" 1409 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1410 { 1411 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1412 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1413 PetscErrorCode ierr; 1414 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1415 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1416 PetscInt *cmap,*idx_p; 1417 1418 PetscFunctionBegin; 1419 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1420 mat->getrowactive = PETSC_TRUE; 1421 1422 if (!mat->rowvalues && (idx || v)) { 1423 /* 1424 allocate enough space to hold information from the longest row. 1425 */ 1426 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1427 PetscInt max = 1,tmp; 1428 for (i=0; i<matin->rmap->n; i++) { 1429 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1430 if (max < tmp) { max = tmp; } 1431 } 1432 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1433 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1434 } 1435 1436 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1437 lrow = row - rstart; 1438 1439 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1440 if (!v) {pvA = 0; pvB = 0;} 1441 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1442 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1443 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1444 nztot = nzA + nzB; 1445 1446 cmap = mat->garray; 1447 if (v || idx) { 1448 if (nztot) { 1449 /* Sort by increasing column numbers, assuming A and B already sorted */ 1450 PetscInt imark = -1; 1451 if (v) { 1452 *v = v_p = mat->rowvalues; 1453 for (i=0; i<nzB; i++) { 1454 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1455 else break; 1456 } 1457 imark = i; 1458 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1459 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1460 } 1461 if (idx) { 1462 *idx = idx_p = mat->rowindices; 1463 if (imark > -1) { 1464 for (i=0; i<imark; i++) { 1465 idx_p[i] = cmap[cworkB[i]]; 1466 } 1467 } else { 1468 for (i=0; i<nzB; i++) { 1469 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1470 else break; 1471 } 1472 imark = i; 1473 } 1474 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1475 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1476 } 1477 } else { 1478 if (idx) *idx = 0; 1479 if (v) *v = 0; 1480 } 1481 } 1482 *nz = nztot; 1483 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1484 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1490 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1491 { 1492 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1493 1494 PetscFunctionBegin; 1495 if (!aij->getrowactive) { 1496 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1497 } 1498 aij->getrowactive = PETSC_FALSE; 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatNorm_MPIAIJ" 1504 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1505 { 1506 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1507 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1508 PetscErrorCode ierr; 1509 PetscInt i,j,cstart = mat->cmap->rstart; 1510 PetscReal sum = 0.0; 1511 MatScalar *v; 1512 1513 PetscFunctionBegin; 1514 if (aij->size == 1) { 1515 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1516 } else { 1517 if (type == NORM_FROBENIUS) { 1518 v = amat->a; 1519 for (i=0; i<amat->nz; i++) { 1520 #if defined(PETSC_USE_COMPLEX) 1521 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1522 #else 1523 sum += (*v)*(*v); v++; 1524 #endif 1525 } 1526 v = bmat->a; 1527 for (i=0; i<bmat->nz; i++) { 1528 #if defined(PETSC_USE_COMPLEX) 1529 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1530 #else 1531 sum += (*v)*(*v); v++; 1532 #endif 1533 } 1534 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1535 *norm = sqrt(*norm); 1536 } else if (type == NORM_1) { /* max column norm */ 1537 PetscReal *tmp,*tmp2; 1538 PetscInt *jj,*garray = aij->garray; 1539 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1540 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1541 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1542 *norm = 0.0; 1543 v = amat->a; jj = amat->j; 1544 for (j=0; j<amat->nz; j++) { 1545 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1546 } 1547 v = bmat->a; jj = bmat->j; 1548 for (j=0; j<bmat->nz; j++) { 1549 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1550 } 1551 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1552 for (j=0; j<mat->cmap->N; j++) { 1553 if (tmp2[j] > *norm) *norm = tmp2[j]; 1554 } 1555 ierr = PetscFree(tmp);CHKERRQ(ierr); 1556 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1557 } else if (type == NORM_INFINITY) { /* max row norm */ 1558 PetscReal ntemp = 0.0; 1559 for (j=0; j<aij->A->rmap->n; j++) { 1560 v = amat->a + amat->i[j]; 1561 sum = 0.0; 1562 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1563 sum += PetscAbsScalar(*v); v++; 1564 } 1565 v = bmat->a + bmat->i[j]; 1566 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1567 sum += PetscAbsScalar(*v); v++; 1568 } 1569 if (sum > ntemp) ntemp = sum; 1570 } 1571 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1572 } else { 1573 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1574 } 1575 } 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatTranspose_MPIAIJ" 1581 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1582 { 1583 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1584 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1585 PetscErrorCode ierr; 1586 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1587 PetscInt cstart=A->cmap->rstart,ncol; 1588 Mat B; 1589 MatScalar *array; 1590 1591 PetscFunctionBegin; 1592 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1593 1594 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1595 ai = Aloc->i; aj = Aloc->j; 1596 bi = Bloc->i; bj = Bloc->j; 1597 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1598 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1599 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1600 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1601 for (i=0; i<ai[ma]; i++){ 1602 d_nnz[aj[i]] ++; 1603 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1604 } 1605 1606 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1607 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1608 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1609 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1610 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1611 } else { 1612 B = *matout; 1613 } 1614 1615 /* copy over the A part */ 1616 array = Aloc->a; 1617 row = A->rmap->rstart; 1618 for (i=0; i<ma; i++) { 1619 ncol = ai[i+1]-ai[i]; 1620 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1621 row++; array += ncol; aj += ncol; 1622 } 1623 aj = Aloc->j; 1624 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1625 1626 /* copy over the B part */ 1627 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1628 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1629 array = Bloc->a; 1630 row = A->rmap->rstart; 1631 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1632 cols_tmp = cols; 1633 for (i=0; i<mb; i++) { 1634 ncol = bi[i+1]-bi[i]; 1635 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1636 row++; array += ncol; cols_tmp += ncol; 1637 } 1638 ierr = PetscFree(cols);CHKERRQ(ierr); 1639 1640 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1643 *matout = B; 1644 } else { 1645 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1646 } 1647 PetscFunctionReturn(0); 1648 } 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1652 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1653 { 1654 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1655 Mat a = aij->A,b = aij->B; 1656 PetscErrorCode ierr; 1657 PetscInt s1,s2,s3; 1658 1659 PetscFunctionBegin; 1660 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1661 if (rr) { 1662 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1663 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1664 /* Overlap communication with computation. */ 1665 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 } 1667 if (ll) { 1668 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1669 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1670 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1671 } 1672 /* scale the diagonal block */ 1673 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1674 1675 if (rr) { 1676 /* Do a scatter end and then right scale the off-diagonal block */ 1677 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1679 } 1680 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1686 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 PetscErrorCode ierr; 1690 1691 PetscFunctionBegin; 1692 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1693 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1694 PetscFunctionReturn(0); 1695 } 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1698 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1699 { 1700 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1701 PetscErrorCode ierr; 1702 1703 PetscFunctionBegin; 1704 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatEqual_MPIAIJ" 1710 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1711 { 1712 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1713 Mat a,b,c,d; 1714 PetscTruth flg; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 a = matA->A; b = matA->B; 1719 c = matB->A; d = matB->B; 1720 1721 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1722 if (flg) { 1723 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1724 } 1725 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatCopy_MPIAIJ" 1731 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1732 { 1733 PetscErrorCode ierr; 1734 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1735 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1736 1737 PetscFunctionBegin; 1738 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1739 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1740 /* because of the column compression in the off-processor part of the matrix a->B, 1741 the number of columns in a->B and b->B may be different, hence we cannot call 1742 the MatCopy() directly on the two parts. If need be, we can provide a more 1743 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1744 then copying the submatrices */ 1745 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1746 } else { 1747 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1748 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1749 } 1750 PetscFunctionReturn(0); 1751 } 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1755 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1756 { 1757 PetscErrorCode ierr; 1758 1759 PetscFunctionBegin; 1760 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #include "petscblaslapack.h" 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatAXPY_MPIAIJ" 1767 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1768 { 1769 PetscErrorCode ierr; 1770 PetscInt i; 1771 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1772 PetscBLASInt bnz,one=1; 1773 Mat_SeqAIJ *x,*y; 1774 1775 PetscFunctionBegin; 1776 if (str == SAME_NONZERO_PATTERN) { 1777 PetscScalar alpha = a; 1778 x = (Mat_SeqAIJ *)xx->A->data; 1779 y = (Mat_SeqAIJ *)yy->A->data; 1780 bnz = PetscBLASIntCast(x->nz); 1781 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1782 x = (Mat_SeqAIJ *)xx->B->data; 1783 y = (Mat_SeqAIJ *)yy->B->data; 1784 bnz = PetscBLASIntCast(x->nz); 1785 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1786 } else if (str == SUBSET_NONZERO_PATTERN) { 1787 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1788 1789 x = (Mat_SeqAIJ *)xx->B->data; 1790 y = (Mat_SeqAIJ *)yy->B->data; 1791 if (y->xtoy && y->XtoY != xx->B) { 1792 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1793 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1794 } 1795 if (!y->xtoy) { /* get xtoy */ 1796 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1797 y->XtoY = xx->B; 1798 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1799 } 1800 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1801 } else { 1802 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1803 } 1804 PetscFunctionReturn(0); 1805 } 1806 1807 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatConjugate_MPIAIJ" 1811 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1812 { 1813 #if defined(PETSC_USE_COMPLEX) 1814 PetscErrorCode ierr; 1815 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1816 1817 PetscFunctionBegin; 1818 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1819 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1820 #else 1821 PetscFunctionBegin; 1822 #endif 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatRealPart_MPIAIJ" 1828 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1829 { 1830 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1831 PetscErrorCode ierr; 1832 1833 PetscFunctionBegin; 1834 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1835 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1841 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1842 { 1843 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1844 PetscErrorCode ierr; 1845 1846 PetscFunctionBegin; 1847 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1848 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 1852 #ifdef PETSC_HAVE_PBGL 1853 1854 #include <boost/parallel/mpi/bsp_process_group.hpp> 1855 #include <boost/graph/distributed/ilu_default_graph.hpp> 1856 #include <boost/graph/distributed/ilu_0_block.hpp> 1857 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1858 #include <boost/graph/distributed/petsc/interface.hpp> 1859 #include <boost/multi_array.hpp> 1860 #include <boost/parallel/distributed_property_map->hpp> 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1864 /* 1865 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1866 */ 1867 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1868 { 1869 namespace petsc = boost::distributed::petsc; 1870 1871 namespace graph_dist = boost::graph::distributed; 1872 using boost::graph::distributed::ilu_default::process_group_type; 1873 using boost::graph::ilu_permuted; 1874 1875 PetscTruth row_identity, col_identity; 1876 PetscContainer c; 1877 PetscInt m, n, M, N; 1878 PetscErrorCode ierr; 1879 1880 PetscFunctionBegin; 1881 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1882 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1883 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1884 if (!row_identity || !col_identity) { 1885 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1886 } 1887 1888 process_group_type pg; 1889 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1890 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1891 lgraph_type& level_graph = *lgraph_p; 1892 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1893 1894 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1895 ilu_permuted(level_graph); 1896 1897 /* put together the new matrix */ 1898 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1899 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1900 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1901 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1902 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1903 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1904 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1905 1906 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1907 ierr = PetscContainerSetPointer(c, lgraph_p); 1908 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1914 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1915 { 1916 PetscFunctionBegin; 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatSolve_MPIAIJ" 1922 /* 1923 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1924 */ 1925 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1926 { 1927 namespace graph_dist = boost::graph::distributed; 1928 1929 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1930 lgraph_type* lgraph_p; 1931 PetscContainer c; 1932 PetscErrorCode ierr; 1933 1934 PetscFunctionBegin; 1935 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1936 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1937 ierr = VecCopy(b, x);CHKERRQ(ierr); 1938 1939 PetscScalar* array_x; 1940 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1941 PetscInt sx; 1942 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1943 1944 PetscScalar* array_b; 1945 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1946 PetscInt sb; 1947 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1948 1949 lgraph_type& level_graph = *lgraph_p; 1950 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1951 1952 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1953 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1954 ref_x(array_x, boost::extents[num_vertices(graph)]); 1955 1956 typedef boost::iterator_property_map<array_ref_type::iterator, 1957 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1958 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1959 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1960 1961 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1962 1963 PetscFunctionReturn(0); 1964 } 1965 #endif 1966 1967 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1968 PetscInt nzlocal,nsends,nrecvs; 1969 PetscMPIInt *send_rank; 1970 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1971 PetscScalar *sbuf_a,**rbuf_a; 1972 PetscErrorCode (*MatDestroy)(Mat); 1973 } Mat_Redundant; 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1977 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1978 { 1979 PetscErrorCode ierr; 1980 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1981 PetscInt i; 1982 1983 PetscFunctionBegin; 1984 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1985 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1986 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1987 for (i=0; i<redund->nrecvs; i++){ 1988 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1989 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1990 } 1991 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1992 ierr = PetscFree(redund);CHKERRQ(ierr); 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatDestroy_MatRedundant" 1998 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1999 { 2000 PetscErrorCode ierr; 2001 PetscContainer container; 2002 Mat_Redundant *redund=PETSC_NULL; 2003 2004 PetscFunctionBegin; 2005 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2006 if (container) { 2007 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2008 } else { 2009 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2010 } 2011 A->ops->destroy = redund->MatDestroy; 2012 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2013 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2014 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2020 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2021 { 2022 PetscMPIInt rank,size; 2023 MPI_Comm comm=((PetscObject)mat)->comm; 2024 PetscErrorCode ierr; 2025 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2026 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2027 PetscInt *rowrange=mat->rmap->range; 2028 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2029 Mat A=aij->A,B=aij->B,C=*matredundant; 2030 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2031 PetscScalar *sbuf_a; 2032 PetscInt nzlocal=a->nz+b->nz; 2033 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2034 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2035 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2036 MatScalar *aworkA,*aworkB; 2037 PetscScalar *vals; 2038 PetscMPIInt tag1,tag2,tag3,imdex; 2039 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2040 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2041 MPI_Status recv_status,*send_status; 2042 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2043 PetscInt **rbuf_j=PETSC_NULL; 2044 PetscScalar **rbuf_a=PETSC_NULL; 2045 Mat_Redundant *redund=PETSC_NULL; 2046 PetscContainer container; 2047 2048 PetscFunctionBegin; 2049 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2050 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2051 2052 if (reuse == MAT_REUSE_MATRIX) { 2053 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2054 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2055 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2056 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2057 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2058 if (container) { 2059 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2060 } else { 2061 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2062 } 2063 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2064 2065 nsends = redund->nsends; 2066 nrecvs = redund->nrecvs; 2067 send_rank = redund->send_rank; recv_rank = send_rank + size; 2068 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2069 sbuf_j = redund->sbuf_j; 2070 sbuf_a = redund->sbuf_a; 2071 rbuf_j = redund->rbuf_j; 2072 rbuf_a = redund->rbuf_a; 2073 } 2074 2075 if (reuse == MAT_INITIAL_MATRIX){ 2076 PetscMPIInt subrank,subsize; 2077 PetscInt nleftover,np_subcomm; 2078 /* get the destination processors' id send_rank, nsends and nrecvs */ 2079 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2080 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2081 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2082 recv_rank = send_rank + size; 2083 np_subcomm = size/nsubcomm; 2084 nleftover = size - nsubcomm*np_subcomm; 2085 nsends = 0; nrecvs = 0; 2086 for (i=0; i<size; i++){ /* i=rank*/ 2087 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2088 send_rank[nsends] = i; nsends++; 2089 recv_rank[nrecvs++] = i; 2090 } 2091 } 2092 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2093 i = size-nleftover-1; 2094 j = 0; 2095 while (j < nsubcomm - nleftover){ 2096 send_rank[nsends++] = i; 2097 i--; j++; 2098 } 2099 } 2100 2101 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2102 for (i=0; i<nleftover; i++){ 2103 recv_rank[nrecvs++] = size-nleftover+i; 2104 } 2105 } 2106 2107 /* allocate sbuf_j, sbuf_a */ 2108 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2109 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2110 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2111 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2112 2113 /* copy mat's local entries into the buffers */ 2114 if (reuse == MAT_INITIAL_MATRIX){ 2115 rownz_max = 0; 2116 rptr = sbuf_j; 2117 cols = sbuf_j + rend-rstart + 1; 2118 vals = sbuf_a; 2119 rptr[0] = 0; 2120 for (i=0; i<rend-rstart; i++){ 2121 row = i + rstart; 2122 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2123 ncols = nzA + nzB; 2124 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2125 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2126 /* load the column indices for this row into cols */ 2127 lwrite = 0; 2128 for (l=0; l<nzB; l++) { 2129 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2130 vals[lwrite] = aworkB[l]; 2131 cols[lwrite++] = ctmp; 2132 } 2133 } 2134 for (l=0; l<nzA; l++){ 2135 vals[lwrite] = aworkA[l]; 2136 cols[lwrite++] = cstart + cworkA[l]; 2137 } 2138 for (l=0; l<nzB; l++) { 2139 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2140 vals[lwrite] = aworkB[l]; 2141 cols[lwrite++] = ctmp; 2142 } 2143 } 2144 vals += ncols; 2145 cols += ncols; 2146 rptr[i+1] = rptr[i] + ncols; 2147 if (rownz_max < ncols) rownz_max = ncols; 2148 } 2149 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2150 } else { /* only copy matrix values into sbuf_a */ 2151 rptr = sbuf_j; 2152 vals = sbuf_a; 2153 rptr[0] = 0; 2154 for (i=0; i<rend-rstart; i++){ 2155 row = i + rstart; 2156 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2157 ncols = nzA + nzB; 2158 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2159 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2160 lwrite = 0; 2161 for (l=0; l<nzB; l++) { 2162 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2163 } 2164 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2165 for (l=0; l<nzB; l++) { 2166 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2167 } 2168 vals += ncols; 2169 rptr[i+1] = rptr[i] + ncols; 2170 } 2171 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2172 2173 /* send nzlocal to others, and recv other's nzlocal */ 2174 /*--------------------------------------------------*/ 2175 if (reuse == MAT_INITIAL_MATRIX){ 2176 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2177 s_waits2 = s_waits3 + nsends; 2178 s_waits1 = s_waits2 + nsends; 2179 r_waits1 = s_waits1 + nsends; 2180 r_waits2 = r_waits1 + nrecvs; 2181 r_waits3 = r_waits2 + nrecvs; 2182 } else { 2183 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2184 r_waits3 = s_waits3 + nsends; 2185 } 2186 2187 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2188 if (reuse == MAT_INITIAL_MATRIX){ 2189 /* get new tags to keep the communication clean */ 2190 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2191 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2192 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2193 rbuf_nz = sbuf_nz + nsends; 2194 2195 /* post receives of other's nzlocal */ 2196 for (i=0; i<nrecvs; i++){ 2197 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2198 } 2199 /* send nzlocal to others */ 2200 for (i=0; i<nsends; i++){ 2201 sbuf_nz[i] = nzlocal; 2202 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2203 } 2204 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2205 count = nrecvs; 2206 while (count) { 2207 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2208 recv_rank[imdex] = recv_status.MPI_SOURCE; 2209 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2210 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2211 2212 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2213 rbuf_nz[imdex] += i + 2; 2214 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2215 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2216 count--; 2217 } 2218 /* wait on sends of nzlocal */ 2219 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2220 /* send mat->i,j to others, and recv from other's */ 2221 /*------------------------------------------------*/ 2222 for (i=0; i<nsends; i++){ 2223 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2224 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2225 } 2226 /* wait on receives of mat->i,j */ 2227 /*------------------------------*/ 2228 count = nrecvs; 2229 while (count) { 2230 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2231 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2232 count--; 2233 } 2234 /* wait on sends of mat->i,j */ 2235 /*---------------------------*/ 2236 if (nsends) { 2237 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2238 } 2239 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2240 2241 /* post receives, send and receive mat->a */ 2242 /*----------------------------------------*/ 2243 for (imdex=0; imdex<nrecvs; imdex++) { 2244 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2245 } 2246 for (i=0; i<nsends; i++){ 2247 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2248 } 2249 count = nrecvs; 2250 while (count) { 2251 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2252 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2253 count--; 2254 } 2255 if (nsends) { 2256 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2257 } 2258 2259 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2260 2261 /* create redundant matrix */ 2262 /*-------------------------*/ 2263 if (reuse == MAT_INITIAL_MATRIX){ 2264 /* compute rownz_max for preallocation */ 2265 for (imdex=0; imdex<nrecvs; imdex++){ 2266 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2267 rptr = rbuf_j[imdex]; 2268 for (i=0; i<j; i++){ 2269 ncols = rptr[i+1] - rptr[i]; 2270 if (rownz_max < ncols) rownz_max = ncols; 2271 } 2272 } 2273 2274 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2275 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2276 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2277 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2278 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2279 } else { 2280 C = *matredundant; 2281 } 2282 2283 /* insert local matrix entries */ 2284 rptr = sbuf_j; 2285 cols = sbuf_j + rend-rstart + 1; 2286 vals = sbuf_a; 2287 for (i=0; i<rend-rstart; i++){ 2288 row = i + rstart; 2289 ncols = rptr[i+1] - rptr[i]; 2290 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2291 vals += ncols; 2292 cols += ncols; 2293 } 2294 /* insert received matrix entries */ 2295 for (imdex=0; imdex<nrecvs; imdex++){ 2296 rstart = rowrange[recv_rank[imdex]]; 2297 rend = rowrange[recv_rank[imdex]+1]; 2298 rptr = rbuf_j[imdex]; 2299 cols = rbuf_j[imdex] + rend-rstart + 1; 2300 vals = rbuf_a[imdex]; 2301 for (i=0; i<rend-rstart; i++){ 2302 row = i + rstart; 2303 ncols = rptr[i+1] - rptr[i]; 2304 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2305 vals += ncols; 2306 cols += ncols; 2307 } 2308 } 2309 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2310 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2311 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2312 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2313 if (reuse == MAT_INITIAL_MATRIX){ 2314 PetscContainer container; 2315 *matredundant = C; 2316 /* create a supporting struct and attach it to C for reuse */ 2317 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2318 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2319 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2320 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2321 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2322 2323 redund->nzlocal = nzlocal; 2324 redund->nsends = nsends; 2325 redund->nrecvs = nrecvs; 2326 redund->send_rank = send_rank; 2327 redund->sbuf_nz = sbuf_nz; 2328 redund->sbuf_j = sbuf_j; 2329 redund->sbuf_a = sbuf_a; 2330 redund->rbuf_j = rbuf_j; 2331 redund->rbuf_a = rbuf_a; 2332 2333 redund->MatDestroy = C->ops->destroy; 2334 C->ops->destroy = MatDestroy_MatRedundant; 2335 } 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2341 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2342 { 2343 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2344 PetscErrorCode ierr; 2345 PetscInt i,*idxb = 0; 2346 PetscScalar *va,*vb; 2347 Vec vtmp; 2348 2349 PetscFunctionBegin; 2350 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2351 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2352 if (idx) { 2353 for (i=0; i<A->rmap->n; i++) { 2354 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2355 } 2356 } 2357 2358 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2359 if (idx) { 2360 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2361 } 2362 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2363 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2364 2365 for (i=0; i<A->rmap->n; i++){ 2366 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2367 va[i] = vb[i]; 2368 if (idx) idx[i] = a->garray[idxb[i]]; 2369 } 2370 } 2371 2372 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2373 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2374 if (idxb) { 2375 ierr = PetscFree(idxb);CHKERRQ(ierr); 2376 } 2377 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2383 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2384 { 2385 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2386 PetscErrorCode ierr; 2387 PetscInt i,*idxb = 0; 2388 PetscScalar *va,*vb; 2389 Vec vtmp; 2390 2391 PetscFunctionBegin; 2392 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2393 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2394 if (idx) { 2395 for (i=0; i<A->cmap->n; i++) { 2396 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2397 } 2398 } 2399 2400 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2401 if (idx) { 2402 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2403 } 2404 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2405 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2406 2407 for (i=0; i<A->rmap->n; i++){ 2408 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2409 va[i] = vb[i]; 2410 if (idx) idx[i] = a->garray[idxb[i]]; 2411 } 2412 } 2413 2414 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2415 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2416 if (idxb) { 2417 ierr = PetscFree(idxb);CHKERRQ(ierr); 2418 } 2419 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2425 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2426 { 2427 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2428 PetscInt n = A->rmap->n; 2429 PetscInt cstart = A->cmap->rstart; 2430 PetscInt *cmap = mat->garray; 2431 PetscInt *diagIdx, *offdiagIdx; 2432 Vec diagV, offdiagV; 2433 PetscScalar *a, *diagA, *offdiagA; 2434 PetscInt r; 2435 PetscErrorCode ierr; 2436 2437 PetscFunctionBegin; 2438 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2439 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2440 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2441 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2442 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2443 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2444 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2445 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2446 for(r = 0; r < n; ++r) { 2447 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2448 a[r] = diagA[r]; 2449 idx[r] = cstart + diagIdx[r]; 2450 } else { 2451 a[r] = offdiagA[r]; 2452 idx[r] = cmap[offdiagIdx[r]]; 2453 } 2454 } 2455 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2456 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2457 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2458 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2459 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2460 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2466 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2467 { 2468 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2469 PetscInt n = A->rmap->n; 2470 PetscInt cstart = A->cmap->rstart; 2471 PetscInt *cmap = mat->garray; 2472 PetscInt *diagIdx, *offdiagIdx; 2473 Vec diagV, offdiagV; 2474 PetscScalar *a, *diagA, *offdiagA; 2475 PetscInt r; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2480 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2481 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2482 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2483 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2484 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2485 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2486 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2487 for(r = 0; r < n; ++r) { 2488 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2489 a[r] = diagA[r]; 2490 idx[r] = cstart + diagIdx[r]; 2491 } else { 2492 a[r] = offdiagA[r]; 2493 idx[r] = cmap[offdiagIdx[r]]; 2494 } 2495 } 2496 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2497 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2498 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2499 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2500 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2501 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2502 PetscFunctionReturn(0); 2503 } 2504 2505 #undef __FUNCT__ 2506 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2507 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2508 { 2509 PetscErrorCode ierr; 2510 2511 PetscFunctionBegin; 2512 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 /* -------------------------------------------------------------------*/ 2517 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2518 MatGetRow_MPIAIJ, 2519 MatRestoreRow_MPIAIJ, 2520 MatMult_MPIAIJ, 2521 /* 4*/ MatMultAdd_MPIAIJ, 2522 MatMultTranspose_MPIAIJ, 2523 MatMultTransposeAdd_MPIAIJ, 2524 #ifdef PETSC_HAVE_PBGL 2525 MatSolve_MPIAIJ, 2526 #else 2527 0, 2528 #endif 2529 0, 2530 0, 2531 /*10*/ 0, 2532 0, 2533 0, 2534 MatRelax_MPIAIJ, 2535 MatTranspose_MPIAIJ, 2536 /*15*/ MatGetInfo_MPIAIJ, 2537 MatEqual_MPIAIJ, 2538 MatGetDiagonal_MPIAIJ, 2539 MatDiagonalScale_MPIAIJ, 2540 MatNorm_MPIAIJ, 2541 /*20*/ MatAssemblyBegin_MPIAIJ, 2542 MatAssemblyEnd_MPIAIJ, 2543 0, 2544 MatSetOption_MPIAIJ, 2545 MatZeroEntries_MPIAIJ, 2546 /*25*/ MatZeroRows_MPIAIJ, 2547 0, 2548 #ifdef PETSC_HAVE_PBGL 2549 0, 2550 #else 2551 0, 2552 #endif 2553 0, 2554 0, 2555 /*30*/ MatSetUpPreallocation_MPIAIJ, 2556 #ifdef PETSC_HAVE_PBGL 2557 0, 2558 #else 2559 0, 2560 #endif 2561 0, 2562 0, 2563 0, 2564 /*35*/ MatDuplicate_MPIAIJ, 2565 0, 2566 0, 2567 0, 2568 0, 2569 /*40*/ MatAXPY_MPIAIJ, 2570 MatGetSubMatrices_MPIAIJ, 2571 MatIncreaseOverlap_MPIAIJ, 2572 MatGetValues_MPIAIJ, 2573 MatCopy_MPIAIJ, 2574 /*45*/ MatGetRowMax_MPIAIJ, 2575 MatScale_MPIAIJ, 2576 0, 2577 0, 2578 0, 2579 /*50*/ MatSetBlockSize_MPIAIJ, 2580 0, 2581 0, 2582 0, 2583 0, 2584 /*55*/ MatFDColoringCreate_MPIAIJ, 2585 0, 2586 MatSetUnfactored_MPIAIJ, 2587 MatPermute_MPIAIJ, 2588 0, 2589 /*60*/ MatGetSubMatrix_MPIAIJ, 2590 MatDestroy_MPIAIJ, 2591 MatView_MPIAIJ, 2592 0, 2593 0, 2594 /*65*/ 0, 2595 0, 2596 0, 2597 0, 2598 0, 2599 /*70*/ MatGetRowMaxAbs_MPIAIJ, 2600 MatGetRowMinAbs_MPIAIJ, 2601 0, 2602 MatSetColoring_MPIAIJ, 2603 #if defined(PETSC_HAVE_ADIC) 2604 MatSetValuesAdic_MPIAIJ, 2605 #else 2606 0, 2607 #endif 2608 MatSetValuesAdifor_MPIAIJ, 2609 /*75*/ 0, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*80*/ 0, 2615 0, 2616 0, 2617 /*84*/ MatLoad_MPIAIJ, 2618 0, 2619 0, 2620 0, 2621 0, 2622 0, 2623 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2624 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2625 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2626 MatPtAP_Basic, 2627 MatPtAPSymbolic_MPIAIJ, 2628 /*95*/ MatPtAPNumeric_MPIAIJ, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /*100*/0, 2634 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2635 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2636 MatConjugate_MPIAIJ, 2637 0, 2638 /*105*/MatSetValuesRow_MPIAIJ, 2639 MatRealPart_MPIAIJ, 2640 MatImaginaryPart_MPIAIJ, 2641 0, 2642 0, 2643 /*110*/0, 2644 MatGetRedundantMatrix_MPIAIJ, 2645 MatGetRowMin_MPIAIJ, 2646 0, 2647 0, 2648 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2649 2650 /* ----------------------------------------------------------------------------------------*/ 2651 2652 EXTERN_C_BEGIN 2653 #undef __FUNCT__ 2654 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2655 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2656 { 2657 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2658 PetscErrorCode ierr; 2659 2660 PetscFunctionBegin; 2661 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2662 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2663 PetscFunctionReturn(0); 2664 } 2665 EXTERN_C_END 2666 2667 EXTERN_C_BEGIN 2668 #undef __FUNCT__ 2669 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2670 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2671 { 2672 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2673 PetscErrorCode ierr; 2674 2675 PetscFunctionBegin; 2676 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2677 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2678 PetscFunctionReturn(0); 2679 } 2680 EXTERN_C_END 2681 2682 #include "petscpc.h" 2683 EXTERN_C_BEGIN 2684 #undef __FUNCT__ 2685 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2686 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2687 { 2688 Mat_MPIAIJ *b; 2689 PetscErrorCode ierr; 2690 PetscInt i; 2691 2692 PetscFunctionBegin; 2693 B->preallocated = PETSC_TRUE; 2694 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2695 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2696 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2697 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2698 2699 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2700 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2701 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2702 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2703 if (d_nnz) { 2704 for (i=0; i<B->rmap->n; i++) { 2705 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2706 } 2707 } 2708 if (o_nnz) { 2709 for (i=0; i<B->rmap->n; i++) { 2710 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2711 } 2712 } 2713 b = (Mat_MPIAIJ*)B->data; 2714 2715 /* Explicitly create 2 MATSEQAIJ matrices. */ 2716 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2717 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2718 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2719 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2720 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2721 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2722 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2723 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2724 2725 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2726 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2727 2728 PetscFunctionReturn(0); 2729 } 2730 EXTERN_C_END 2731 2732 #undef __FUNCT__ 2733 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2734 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2735 { 2736 Mat mat; 2737 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2738 PetscErrorCode ierr; 2739 2740 PetscFunctionBegin; 2741 *newmat = 0; 2742 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2743 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2744 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2745 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2746 a = (Mat_MPIAIJ*)mat->data; 2747 2748 mat->factor = matin->factor; 2749 mat->rmap->bs = matin->rmap->bs; 2750 mat->assembled = PETSC_TRUE; 2751 mat->insertmode = NOT_SET_VALUES; 2752 mat->preallocated = PETSC_TRUE; 2753 2754 a->size = oldmat->size; 2755 a->rank = oldmat->rank; 2756 a->donotstash = oldmat->donotstash; 2757 a->roworiented = oldmat->roworiented; 2758 a->rowindices = 0; 2759 a->rowvalues = 0; 2760 a->getrowactive = PETSC_FALSE; 2761 2762 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2763 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2764 2765 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2766 if (oldmat->colmap) { 2767 #if defined (PETSC_USE_CTABLE) 2768 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2769 #else 2770 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2771 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2772 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2773 #endif 2774 } else a->colmap = 0; 2775 if (oldmat->garray) { 2776 PetscInt len; 2777 len = oldmat->B->cmap->n; 2778 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2779 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2780 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2781 } else a->garray = 0; 2782 2783 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2785 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2786 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2787 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2788 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2789 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2791 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2792 *newmat = mat; 2793 PetscFunctionReturn(0); 2794 } 2795 2796 #include "petscsys.h" 2797 2798 #undef __FUNCT__ 2799 #define __FUNCT__ "MatLoad_MPIAIJ" 2800 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2801 { 2802 Mat A; 2803 PetscScalar *vals,*svals; 2804 MPI_Comm comm = ((PetscObject)viewer)->comm; 2805 MPI_Status status; 2806 PetscErrorCode ierr; 2807 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2808 PetscInt i,nz,j,rstart,rend,mmax; 2809 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2810 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2811 PetscInt cend,cstart,n,*rowners; 2812 int fd; 2813 2814 PetscFunctionBegin; 2815 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2816 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2817 if (!rank) { 2818 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2819 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2820 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2821 } 2822 2823 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2824 M = header[1]; N = header[2]; 2825 /* determine ownership of all rows */ 2826 m = M/size + ((M % size) > rank); 2827 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2828 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2829 2830 /* First process needs enough room for process with most rows */ 2831 if (!rank) { 2832 mmax = rowners[1]; 2833 for (i=2; i<size; i++) { 2834 mmax = PetscMax(mmax,rowners[i]); 2835 } 2836 } else mmax = m; 2837 2838 rowners[0] = 0; 2839 for (i=2; i<=size; i++) { 2840 rowners[i] += rowners[i-1]; 2841 } 2842 rstart = rowners[rank]; 2843 rend = rowners[rank+1]; 2844 2845 /* distribute row lengths to all processors */ 2846 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2847 if (!rank) { 2848 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2849 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2850 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2851 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2852 for (j=0; j<m; j++) { 2853 procsnz[0] += ourlens[j]; 2854 } 2855 for (i=1; i<size; i++) { 2856 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2857 /* calculate the number of nonzeros on each processor */ 2858 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2859 procsnz[i] += rowlengths[j]; 2860 } 2861 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2862 } 2863 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2864 } else { 2865 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2866 } 2867 2868 if (!rank) { 2869 /* determine max buffer needed and allocate it */ 2870 maxnz = 0; 2871 for (i=0; i<size; i++) { 2872 maxnz = PetscMax(maxnz,procsnz[i]); 2873 } 2874 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2875 2876 /* read in my part of the matrix column indices */ 2877 nz = procsnz[0]; 2878 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2879 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2880 2881 /* read in every one elses and ship off */ 2882 for (i=1; i<size; i++) { 2883 nz = procsnz[i]; 2884 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2885 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2886 } 2887 ierr = PetscFree(cols);CHKERRQ(ierr); 2888 } else { 2889 /* determine buffer space needed for message */ 2890 nz = 0; 2891 for (i=0; i<m; i++) { 2892 nz += ourlens[i]; 2893 } 2894 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2895 2896 /* receive message of column indices*/ 2897 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2898 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2899 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2900 } 2901 2902 /* determine column ownership if matrix is not square */ 2903 if (N != M) { 2904 n = N/size + ((N % size) > rank); 2905 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2906 cstart = cend - n; 2907 } else { 2908 cstart = rstart; 2909 cend = rend; 2910 n = cend - cstart; 2911 } 2912 2913 /* loop over local rows, determining number of off diagonal entries */ 2914 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2915 jj = 0; 2916 for (i=0; i<m; i++) { 2917 for (j=0; j<ourlens[i]; j++) { 2918 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2919 jj++; 2920 } 2921 } 2922 2923 /* create our matrix */ 2924 for (i=0; i<m; i++) { 2925 ourlens[i] -= offlens[i]; 2926 } 2927 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2928 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2929 ierr = MatSetType(A,type);CHKERRQ(ierr); 2930 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2931 2932 for (i=0; i<m; i++) { 2933 ourlens[i] += offlens[i]; 2934 } 2935 2936 if (!rank) { 2937 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2938 2939 /* read in my part of the matrix numerical values */ 2940 nz = procsnz[0]; 2941 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2942 2943 /* insert into matrix */ 2944 jj = rstart; 2945 smycols = mycols; 2946 svals = vals; 2947 for (i=0; i<m; i++) { 2948 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2949 smycols += ourlens[i]; 2950 svals += ourlens[i]; 2951 jj++; 2952 } 2953 2954 /* read in other processors and ship out */ 2955 for (i=1; i<size; i++) { 2956 nz = procsnz[i]; 2957 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2958 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2959 } 2960 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2961 } else { 2962 /* receive numeric values */ 2963 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2964 2965 /* receive message of values*/ 2966 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2967 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2968 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2969 2970 /* insert into matrix */ 2971 jj = rstart; 2972 smycols = mycols; 2973 svals = vals; 2974 for (i=0; i<m; i++) { 2975 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2976 smycols += ourlens[i]; 2977 svals += ourlens[i]; 2978 jj++; 2979 } 2980 } 2981 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2982 ierr = PetscFree(vals);CHKERRQ(ierr); 2983 ierr = PetscFree(mycols);CHKERRQ(ierr); 2984 ierr = PetscFree(rowners);CHKERRQ(ierr); 2985 2986 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2987 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2988 *newmat = A; 2989 PetscFunctionReturn(0); 2990 } 2991 2992 #undef __FUNCT__ 2993 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2994 /* 2995 Not great since it makes two copies of the submatrix, first an SeqAIJ 2996 in local and then by concatenating the local matrices the end result. 2997 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2998 */ 2999 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3000 { 3001 PetscErrorCode ierr; 3002 PetscMPIInt rank,size; 3003 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3004 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3005 Mat *local,M,Mreuse; 3006 MatScalar *vwork,*aa; 3007 MPI_Comm comm = ((PetscObject)mat)->comm; 3008 Mat_SeqAIJ *aij; 3009 3010 3011 PetscFunctionBegin; 3012 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3013 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3014 3015 if (call == MAT_REUSE_MATRIX) { 3016 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3017 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3018 local = &Mreuse; 3019 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3020 } else { 3021 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3022 Mreuse = *local; 3023 ierr = PetscFree(local);CHKERRQ(ierr); 3024 } 3025 3026 /* 3027 m - number of local rows 3028 n - number of columns (same on all processors) 3029 rstart - first row in new global matrix generated 3030 */ 3031 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3032 if (call == MAT_INITIAL_MATRIX) { 3033 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3034 ii = aij->i; 3035 jj = aij->j; 3036 3037 /* 3038 Determine the number of non-zeros in the diagonal and off-diagonal 3039 portions of the matrix in order to do correct preallocation 3040 */ 3041 3042 /* first get start and end of "diagonal" columns */ 3043 if (csize == PETSC_DECIDE) { 3044 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3045 if (mglobal == n) { /* square matrix */ 3046 nlocal = m; 3047 } else { 3048 nlocal = n/size + ((n % size) > rank); 3049 } 3050 } else { 3051 nlocal = csize; 3052 } 3053 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3054 rstart = rend - nlocal; 3055 if (rank == size - 1 && rend != n) { 3056 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3057 } 3058 3059 /* next, compute all the lengths */ 3060 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3061 olens = dlens + m; 3062 for (i=0; i<m; i++) { 3063 jend = ii[i+1] - ii[i]; 3064 olen = 0; 3065 dlen = 0; 3066 for (j=0; j<jend; j++) { 3067 if (*jj < rstart || *jj >= rend) olen++; 3068 else dlen++; 3069 jj++; 3070 } 3071 olens[i] = olen; 3072 dlens[i] = dlen; 3073 } 3074 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3075 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3076 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3077 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3078 ierr = PetscFree(dlens);CHKERRQ(ierr); 3079 } else { 3080 PetscInt ml,nl; 3081 3082 M = *newmat; 3083 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3084 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3085 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3086 /* 3087 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3088 rather than the slower MatSetValues(). 3089 */ 3090 M->was_assembled = PETSC_TRUE; 3091 M->assembled = PETSC_FALSE; 3092 } 3093 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3094 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3095 ii = aij->i; 3096 jj = aij->j; 3097 aa = aij->a; 3098 for (i=0; i<m; i++) { 3099 row = rstart + i; 3100 nz = ii[i+1] - ii[i]; 3101 cwork = jj; jj += nz; 3102 vwork = aa; aa += nz; 3103 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3104 } 3105 3106 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3107 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3108 *newmat = M; 3109 3110 /* save submatrix used in processor for next request */ 3111 if (call == MAT_INITIAL_MATRIX) { 3112 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3113 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3114 } 3115 3116 PetscFunctionReturn(0); 3117 } 3118 3119 EXTERN_C_BEGIN 3120 #undef __FUNCT__ 3121 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3122 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3123 { 3124 PetscInt m,cstart, cend,j,nnz,i,d; 3125 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3126 const PetscInt *JJ; 3127 PetscScalar *values; 3128 PetscErrorCode ierr; 3129 3130 PetscFunctionBegin; 3131 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3132 3133 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3134 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3135 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3136 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3137 m = B->rmap->n; 3138 cstart = B->cmap->rstart; 3139 cend = B->cmap->rend; 3140 rstart = B->rmap->rstart; 3141 3142 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3143 o_nnz = d_nnz + m; 3144 3145 #if defined(PETSC_USE_DEBUGGING) 3146 for (i=0; i<m; i++) { 3147 nnz = Ii[i+1]- Ii[i]; 3148 JJ = J + Ii[i]; 3149 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3150 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3151 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3152 for (j=1; j<nnz; j++) { 3153 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3154 } 3155 } 3156 #endif 3157 3158 for (i=0; i<m; i++) { 3159 nnz = Ii[i+1]- Ii[i]; 3160 JJ = J + Ii[i]; 3161 nnz_max = PetscMax(nnz_max,nnz); 3162 for (j=0; j<nnz; j++) { 3163 if (*JJ >= cstart) break; 3164 JJ++; 3165 } 3166 d = 0; 3167 for (; j<nnz; j++) { 3168 if (*JJ++ >= cend) break; 3169 d++; 3170 } 3171 d_nnz[i] = d; 3172 o_nnz[i] = nnz - d; 3173 } 3174 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3175 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3176 3177 if (v) values = (PetscScalar*)v; 3178 else { 3179 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3180 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3181 } 3182 3183 for (i=0; i<m; i++) { 3184 ii = i + rstart; 3185 nnz = Ii[i+1]- Ii[i]; 3186 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3187 } 3188 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3189 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3190 3191 if (!v) { 3192 ierr = PetscFree(values);CHKERRQ(ierr); 3193 } 3194 PetscFunctionReturn(0); 3195 } 3196 EXTERN_C_END 3197 3198 #undef __FUNCT__ 3199 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3200 /*@ 3201 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3202 (the default parallel PETSc format). 3203 3204 Collective on MPI_Comm 3205 3206 Input Parameters: 3207 + B - the matrix 3208 . i - the indices into j for the start of each local row (starts with zero) 3209 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3210 - v - optional values in the matrix 3211 3212 Level: developer 3213 3214 Notes: 3215 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3216 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3217 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3218 3219 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3220 3221 The format which is used for the sparse matrix input, is equivalent to a 3222 row-major ordering.. i.e for the following matrix, the input data expected is 3223 as shown: 3224 3225 1 0 0 3226 2 0 3 P0 3227 ------- 3228 4 5 6 P1 3229 3230 Process0 [P0]: rows_owned=[0,1] 3231 i = {0,1,3} [size = nrow+1 = 2+1] 3232 j = {0,0,2} [size = nz = 6] 3233 v = {1,2,3} [size = nz = 6] 3234 3235 Process1 [P1]: rows_owned=[2] 3236 i = {0,3} [size = nrow+1 = 1+1] 3237 j = {0,1,2} [size = nz = 6] 3238 v = {4,5,6} [size = nz = 6] 3239 3240 The column indices for each row MUST be sorted. 3241 3242 .keywords: matrix, aij, compressed row, sparse, parallel 3243 3244 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3245 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3246 @*/ 3247 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3248 { 3249 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3250 3251 PetscFunctionBegin; 3252 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3253 if (f) { 3254 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3255 } 3256 PetscFunctionReturn(0); 3257 } 3258 3259 #undef __FUNCT__ 3260 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3261 /*@C 3262 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3263 (the default parallel PETSc format). For good matrix assembly performance 3264 the user should preallocate the matrix storage by setting the parameters 3265 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3266 performance can be increased by more than a factor of 50. 3267 3268 Collective on MPI_Comm 3269 3270 Input Parameters: 3271 + A - the matrix 3272 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3273 (same value is used for all local rows) 3274 . d_nnz - array containing the number of nonzeros in the various rows of the 3275 DIAGONAL portion of the local submatrix (possibly different for each row) 3276 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3277 The size of this array is equal to the number of local rows, i.e 'm'. 3278 You must leave room for the diagonal entry even if it is zero. 3279 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3280 submatrix (same value is used for all local rows). 3281 - o_nnz - array containing the number of nonzeros in the various rows of the 3282 OFF-DIAGONAL portion of the local submatrix (possibly different for 3283 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3284 structure. The size of this array is equal to the number 3285 of local rows, i.e 'm'. 3286 3287 If the *_nnz parameter is given then the *_nz parameter is ignored 3288 3289 The AIJ format (also called the Yale sparse matrix format or 3290 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3291 storage. The stored row and column indices begin with zero. See the users manual for details. 3292 3293 The parallel matrix is partitioned such that the first m0 rows belong to 3294 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3295 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3296 3297 The DIAGONAL portion of the local submatrix of a processor can be defined 3298 as the submatrix which is obtained by extraction the part corresponding 3299 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3300 first row that belongs to the processor, and r2 is the last row belonging 3301 to the this processor. This is a square mxm matrix. The remaining portion 3302 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3303 3304 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3305 3306 You can call MatGetInfo() to get information on how effective the preallocation was; 3307 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3308 You can also run with the option -info and look for messages with the string 3309 malloc in them to see if additional memory allocation was needed. 3310 3311 Example usage: 3312 3313 Consider the following 8x8 matrix with 34 non-zero values, that is 3314 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3315 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3316 as follows: 3317 3318 .vb 3319 1 2 0 | 0 3 0 | 0 4 3320 Proc0 0 5 6 | 7 0 0 | 8 0 3321 9 0 10 | 11 0 0 | 12 0 3322 ------------------------------------- 3323 13 0 14 | 15 16 17 | 0 0 3324 Proc1 0 18 0 | 19 20 21 | 0 0 3325 0 0 0 | 22 23 0 | 24 0 3326 ------------------------------------- 3327 Proc2 25 26 27 | 0 0 28 | 29 0 3328 30 0 0 | 31 32 33 | 0 34 3329 .ve 3330 3331 This can be represented as a collection of submatrices as: 3332 3333 .vb 3334 A B C 3335 D E F 3336 G H I 3337 .ve 3338 3339 Where the submatrices A,B,C are owned by proc0, D,E,F are 3340 owned by proc1, G,H,I are owned by proc2. 3341 3342 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3343 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3344 The 'M','N' parameters are 8,8, and have the same values on all procs. 3345 3346 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3347 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3348 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3349 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3350 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3351 matrix, ans [DF] as another SeqAIJ matrix. 3352 3353 When d_nz, o_nz parameters are specified, d_nz storage elements are 3354 allocated for every row of the local diagonal submatrix, and o_nz 3355 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3356 One way to choose d_nz and o_nz is to use the max nonzerors per local 3357 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3358 In this case, the values of d_nz,o_nz are: 3359 .vb 3360 proc0 : dnz = 2, o_nz = 2 3361 proc1 : dnz = 3, o_nz = 2 3362 proc2 : dnz = 1, o_nz = 4 3363 .ve 3364 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3365 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3366 for proc3. i.e we are using 12+15+10=37 storage locations to store 3367 34 values. 3368 3369 When d_nnz, o_nnz parameters are specified, the storage is specified 3370 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3371 In the above case the values for d_nnz,o_nnz are: 3372 .vb 3373 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3374 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3375 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3376 .ve 3377 Here the space allocated is sum of all the above values i.e 34, and 3378 hence pre-allocation is perfect. 3379 3380 Level: intermediate 3381 3382 .keywords: matrix, aij, compressed row, sparse, parallel 3383 3384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3385 MPIAIJ, MatGetInfo() 3386 @*/ 3387 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3388 { 3389 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3390 3391 PetscFunctionBegin; 3392 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3393 if (f) { 3394 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3395 } 3396 PetscFunctionReturn(0); 3397 } 3398 3399 #undef __FUNCT__ 3400 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3401 /*@ 3402 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3403 CSR format the local rows. 3404 3405 Collective on MPI_Comm 3406 3407 Input Parameters: 3408 + comm - MPI communicator 3409 . m - number of local rows (Cannot be PETSC_DECIDE) 3410 . n - This value should be the same as the local size used in creating the 3411 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3412 calculated if N is given) For square matrices n is almost always m. 3413 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3414 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3415 . i - row indices 3416 . j - column indices 3417 - a - matrix values 3418 3419 Output Parameter: 3420 . mat - the matrix 3421 3422 Level: intermediate 3423 3424 Notes: 3425 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3426 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3427 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3428 3429 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3430 3431 The format which is used for the sparse matrix input, is equivalent to a 3432 row-major ordering.. i.e for the following matrix, the input data expected is 3433 as shown: 3434 3435 1 0 0 3436 2 0 3 P0 3437 ------- 3438 4 5 6 P1 3439 3440 Process0 [P0]: rows_owned=[0,1] 3441 i = {0,1,3} [size = nrow+1 = 2+1] 3442 j = {0,0,2} [size = nz = 6] 3443 v = {1,2,3} [size = nz = 6] 3444 3445 Process1 [P1]: rows_owned=[2] 3446 i = {0,3} [size = nrow+1 = 1+1] 3447 j = {0,1,2} [size = nz = 6] 3448 v = {4,5,6} [size = nz = 6] 3449 3450 .keywords: matrix, aij, compressed row, sparse, parallel 3451 3452 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3453 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3454 @*/ 3455 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3456 { 3457 PetscErrorCode ierr; 3458 3459 PetscFunctionBegin; 3460 if (i[0]) { 3461 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3462 } 3463 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3464 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3465 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3466 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3467 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3468 PetscFunctionReturn(0); 3469 } 3470 3471 #undef __FUNCT__ 3472 #define __FUNCT__ "MatCreateMPIAIJ" 3473 /*@C 3474 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3475 (the default parallel PETSc format). For good matrix assembly performance 3476 the user should preallocate the matrix storage by setting the parameters 3477 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3478 performance can be increased by more than a factor of 50. 3479 3480 Collective on MPI_Comm 3481 3482 Input Parameters: 3483 + comm - MPI communicator 3484 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3485 This value should be the same as the local size used in creating the 3486 y vector for the matrix-vector product y = Ax. 3487 . n - This value should be the same as the local size used in creating the 3488 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3489 calculated if N is given) For square matrices n is almost always m. 3490 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3491 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3492 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3493 (same value is used for all local rows) 3494 . d_nnz - array containing the number of nonzeros in the various rows of the 3495 DIAGONAL portion of the local submatrix (possibly different for each row) 3496 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3497 The size of this array is equal to the number of local rows, i.e 'm'. 3498 You must leave room for the diagonal entry even if it is zero. 3499 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3500 submatrix (same value is used for all local rows). 3501 - o_nnz - array containing the number of nonzeros in the various rows of the 3502 OFF-DIAGONAL portion of the local submatrix (possibly different for 3503 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3504 structure. The size of this array is equal to the number 3505 of local rows, i.e 'm'. 3506 3507 Output Parameter: 3508 . A - the matrix 3509 3510 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3511 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3512 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3513 3514 Notes: 3515 If the *_nnz parameter is given then the *_nz parameter is ignored 3516 3517 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3518 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3519 storage requirements for this matrix. 3520 3521 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3522 processor than it must be used on all processors that share the object for 3523 that argument. 3524 3525 The user MUST specify either the local or global matrix dimensions 3526 (possibly both). 3527 3528 The parallel matrix is partitioned across processors such that the 3529 first m0 rows belong to process 0, the next m1 rows belong to 3530 process 1, the next m2 rows belong to process 2 etc.. where 3531 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3532 values corresponding to [m x N] submatrix. 3533 3534 The columns are logically partitioned with the n0 columns belonging 3535 to 0th partition, the next n1 columns belonging to the next 3536 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3537 3538 The DIAGONAL portion of the local submatrix on any given processor 3539 is the submatrix corresponding to the rows and columns m,n 3540 corresponding to the given processor. i.e diagonal matrix on 3541 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3542 etc. The remaining portion of the local submatrix [m x (N-n)] 3543 constitute the OFF-DIAGONAL portion. The example below better 3544 illustrates this concept. 3545 3546 For a square global matrix we define each processor's diagonal portion 3547 to be its local rows and the corresponding columns (a square submatrix); 3548 each processor's off-diagonal portion encompasses the remainder of the 3549 local matrix (a rectangular submatrix). 3550 3551 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3552 3553 When calling this routine with a single process communicator, a matrix of 3554 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3555 type of communicator, use the construction mechanism: 3556 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3557 3558 By default, this format uses inodes (identical nodes) when possible. 3559 We search for consecutive rows with the same nonzero structure, thereby 3560 reusing matrix information to achieve increased efficiency. 3561 3562 Options Database Keys: 3563 + -mat_no_inode - Do not use inodes 3564 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3565 - -mat_aij_oneindex - Internally use indexing starting at 1 3566 rather than 0. Note that when calling MatSetValues(), 3567 the user still MUST index entries starting at 0! 3568 3569 3570 Example usage: 3571 3572 Consider the following 8x8 matrix with 34 non-zero values, that is 3573 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3574 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3575 as follows: 3576 3577 .vb 3578 1 2 0 | 0 3 0 | 0 4 3579 Proc0 0 5 6 | 7 0 0 | 8 0 3580 9 0 10 | 11 0 0 | 12 0 3581 ------------------------------------- 3582 13 0 14 | 15 16 17 | 0 0 3583 Proc1 0 18 0 | 19 20 21 | 0 0 3584 0 0 0 | 22 23 0 | 24 0 3585 ------------------------------------- 3586 Proc2 25 26 27 | 0 0 28 | 29 0 3587 30 0 0 | 31 32 33 | 0 34 3588 .ve 3589 3590 This can be represented as a collection of submatrices as: 3591 3592 .vb 3593 A B C 3594 D E F 3595 G H I 3596 .ve 3597 3598 Where the submatrices A,B,C are owned by proc0, D,E,F are 3599 owned by proc1, G,H,I are owned by proc2. 3600 3601 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3602 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3603 The 'M','N' parameters are 8,8, and have the same values on all procs. 3604 3605 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3606 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3607 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3608 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3609 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3610 matrix, ans [DF] as another SeqAIJ matrix. 3611 3612 When d_nz, o_nz parameters are specified, d_nz storage elements are 3613 allocated for every row of the local diagonal submatrix, and o_nz 3614 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3615 One way to choose d_nz and o_nz is to use the max nonzerors per local 3616 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3617 In this case, the values of d_nz,o_nz are: 3618 .vb 3619 proc0 : dnz = 2, o_nz = 2 3620 proc1 : dnz = 3, o_nz = 2 3621 proc2 : dnz = 1, o_nz = 4 3622 .ve 3623 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3624 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3625 for proc3. i.e we are using 12+15+10=37 storage locations to store 3626 34 values. 3627 3628 When d_nnz, o_nnz parameters are specified, the storage is specified 3629 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3630 In the above case the values for d_nnz,o_nnz are: 3631 .vb 3632 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3633 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3634 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3635 .ve 3636 Here the space allocated is sum of all the above values i.e 34, and 3637 hence pre-allocation is perfect. 3638 3639 Level: intermediate 3640 3641 .keywords: matrix, aij, compressed row, sparse, parallel 3642 3643 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3644 MPIAIJ, MatCreateMPIAIJWithArrays() 3645 @*/ 3646 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3647 { 3648 PetscErrorCode ierr; 3649 PetscMPIInt size; 3650 3651 PetscFunctionBegin; 3652 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3653 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3654 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3655 if (size > 1) { 3656 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3657 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3658 } else { 3659 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3660 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3661 } 3662 PetscFunctionReturn(0); 3663 } 3664 3665 #undef __FUNCT__ 3666 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3667 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3668 { 3669 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3670 3671 PetscFunctionBegin; 3672 *Ad = a->A; 3673 *Ao = a->B; 3674 *colmap = a->garray; 3675 PetscFunctionReturn(0); 3676 } 3677 3678 #undef __FUNCT__ 3679 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3680 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3681 { 3682 PetscErrorCode ierr; 3683 PetscInt i; 3684 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3685 3686 PetscFunctionBegin; 3687 if (coloring->ctype == IS_COLORING_GLOBAL) { 3688 ISColoringValue *allcolors,*colors; 3689 ISColoring ocoloring; 3690 3691 /* set coloring for diagonal portion */ 3692 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3693 3694 /* set coloring for off-diagonal portion */ 3695 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3696 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3697 for (i=0; i<a->B->cmap->n; i++) { 3698 colors[i] = allcolors[a->garray[i]]; 3699 } 3700 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3701 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3702 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3703 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3704 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3705 ISColoringValue *colors; 3706 PetscInt *larray; 3707 ISColoring ocoloring; 3708 3709 /* set coloring for diagonal portion */ 3710 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3711 for (i=0; i<a->A->cmap->n; i++) { 3712 larray[i] = i + A->cmap->rstart; 3713 } 3714 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3715 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3716 for (i=0; i<a->A->cmap->n; i++) { 3717 colors[i] = coloring->colors[larray[i]]; 3718 } 3719 ierr = PetscFree(larray);CHKERRQ(ierr); 3720 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3721 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3722 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3723 3724 /* set coloring for off-diagonal portion */ 3725 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3726 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3727 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3728 for (i=0; i<a->B->cmap->n; i++) { 3729 colors[i] = coloring->colors[larray[i]]; 3730 } 3731 ierr = PetscFree(larray);CHKERRQ(ierr); 3732 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3733 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3734 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3735 } else { 3736 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3737 } 3738 3739 PetscFunctionReturn(0); 3740 } 3741 3742 #if defined(PETSC_HAVE_ADIC) 3743 #undef __FUNCT__ 3744 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3745 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3746 { 3747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3748 PetscErrorCode ierr; 3749 3750 PetscFunctionBegin; 3751 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3752 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3753 PetscFunctionReturn(0); 3754 } 3755 #endif 3756 3757 #undef __FUNCT__ 3758 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3759 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3760 { 3761 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3762 PetscErrorCode ierr; 3763 3764 PetscFunctionBegin; 3765 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3766 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3767 PetscFunctionReturn(0); 3768 } 3769 3770 #undef __FUNCT__ 3771 #define __FUNCT__ "MatMerge" 3772 /*@ 3773 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3774 matrices from each processor 3775 3776 Collective on MPI_Comm 3777 3778 Input Parameters: 3779 + comm - the communicators the parallel matrix will live on 3780 . inmat - the input sequential matrices 3781 . n - number of local columns (or PETSC_DECIDE) 3782 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3783 3784 Output Parameter: 3785 . outmat - the parallel matrix generated 3786 3787 Level: advanced 3788 3789 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3790 3791 @*/ 3792 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3793 { 3794 PetscErrorCode ierr; 3795 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3796 PetscInt *indx; 3797 PetscScalar *values; 3798 3799 PetscFunctionBegin; 3800 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3801 if (scall == MAT_INITIAL_MATRIX){ 3802 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3803 if (n == PETSC_DECIDE){ 3804 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3805 } 3806 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3807 rstart -= m; 3808 3809 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3810 for (i=0;i<m;i++) { 3811 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3812 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3813 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3814 } 3815 /* This routine will ONLY return MPIAIJ type matrix */ 3816 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3817 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3818 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3819 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3820 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3821 3822 } else if (scall == MAT_REUSE_MATRIX){ 3823 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3824 } else { 3825 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3826 } 3827 3828 for (i=0;i<m;i++) { 3829 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3830 Ii = i + rstart; 3831 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3832 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3833 } 3834 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3835 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3836 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3837 3838 PetscFunctionReturn(0); 3839 } 3840 3841 #undef __FUNCT__ 3842 #define __FUNCT__ "MatFileSplit" 3843 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3844 { 3845 PetscErrorCode ierr; 3846 PetscMPIInt rank; 3847 PetscInt m,N,i,rstart,nnz; 3848 size_t len; 3849 const PetscInt *indx; 3850 PetscViewer out; 3851 char *name; 3852 Mat B; 3853 const PetscScalar *values; 3854 3855 PetscFunctionBegin; 3856 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3857 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3858 /* Should this be the type of the diagonal block of A? */ 3859 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3860 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3861 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3862 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3863 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3864 for (i=0;i<m;i++) { 3865 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3866 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3867 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3868 } 3869 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3870 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3871 3872 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3873 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3874 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3875 sprintf(name,"%s.%d",outfile,rank); 3876 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3877 ierr = PetscFree(name); 3878 ierr = MatView(B,out);CHKERRQ(ierr); 3879 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3880 ierr = MatDestroy(B);CHKERRQ(ierr); 3881 PetscFunctionReturn(0); 3882 } 3883 3884 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3885 #undef __FUNCT__ 3886 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3887 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3888 { 3889 PetscErrorCode ierr; 3890 Mat_Merge_SeqsToMPI *merge; 3891 PetscContainer container; 3892 3893 PetscFunctionBegin; 3894 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3895 if (container) { 3896 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3897 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3898 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3899 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3900 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3901 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3902 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3903 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3904 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3905 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3906 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3907 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3908 3909 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3910 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3911 } 3912 ierr = PetscFree(merge);CHKERRQ(ierr); 3913 3914 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3915 PetscFunctionReturn(0); 3916 } 3917 3918 #include "../src/mat/utils/freespace.h" 3919 #include "petscbt.h" 3920 3921 #undef __FUNCT__ 3922 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3923 /*@C 3924 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3925 matrices from each processor 3926 3927 Collective on MPI_Comm 3928 3929 Input Parameters: 3930 + comm - the communicators the parallel matrix will live on 3931 . seqmat - the input sequential matrices 3932 . m - number of local rows (or PETSC_DECIDE) 3933 . n - number of local columns (or PETSC_DECIDE) 3934 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3935 3936 Output Parameter: 3937 . mpimat - the parallel matrix generated 3938 3939 Level: advanced 3940 3941 Notes: 3942 The dimensions of the sequential matrix in each processor MUST be the same. 3943 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3944 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3945 @*/ 3946 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3947 { 3948 PetscErrorCode ierr; 3949 MPI_Comm comm=((PetscObject)mpimat)->comm; 3950 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3951 PetscMPIInt size,rank,taga,*len_s; 3952 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 3953 PetscInt proc,m; 3954 PetscInt **buf_ri,**buf_rj; 3955 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3956 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3957 MPI_Request *s_waits,*r_waits; 3958 MPI_Status *status; 3959 MatScalar *aa=a->a; 3960 MatScalar **abuf_r,*ba_i; 3961 Mat_Merge_SeqsToMPI *merge; 3962 PetscContainer container; 3963 3964 PetscFunctionBegin; 3965 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3966 3967 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3968 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3969 3970 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3971 if (container) { 3972 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3973 } 3974 bi = merge->bi; 3975 bj = merge->bj; 3976 buf_ri = merge->buf_ri; 3977 buf_rj = merge->buf_rj; 3978 3979 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3980 owners = merge->rowmap.range; 3981 len_s = merge->len_s; 3982 3983 /* send and recv matrix values */ 3984 /*-----------------------------*/ 3985 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3986 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3987 3988 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3989 for (proc=0,k=0; proc<size; proc++){ 3990 if (!len_s[proc]) continue; 3991 i = owners[proc]; 3992 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3993 k++; 3994 } 3995 3996 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3997 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3998 ierr = PetscFree(status);CHKERRQ(ierr); 3999 4000 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4001 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4002 4003 /* insert mat values of mpimat */ 4004 /*----------------------------*/ 4005 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4006 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4007 nextrow = buf_ri_k + merge->nrecv; 4008 nextai = nextrow + merge->nrecv; 4009 4010 for (k=0; k<merge->nrecv; k++){ 4011 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4012 nrows = *(buf_ri_k[k]); 4013 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4014 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4015 } 4016 4017 /* set values of ba */ 4018 m = merge->rowmap.n; 4019 for (i=0; i<m; i++) { 4020 arow = owners[rank] + i; 4021 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4022 bnzi = bi[i+1] - bi[i]; 4023 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4024 4025 /* add local non-zero vals of this proc's seqmat into ba */ 4026 anzi = ai[arow+1] - ai[arow]; 4027 aj = a->j + ai[arow]; 4028 aa = a->a + ai[arow]; 4029 nextaj = 0; 4030 for (j=0; nextaj<anzi; j++){ 4031 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4032 ba_i[j] += aa[nextaj++]; 4033 } 4034 } 4035 4036 /* add received vals into ba */ 4037 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4038 /* i-th row */ 4039 if (i == *nextrow[k]) { 4040 anzi = *(nextai[k]+1) - *nextai[k]; 4041 aj = buf_rj[k] + *(nextai[k]); 4042 aa = abuf_r[k] + *(nextai[k]); 4043 nextaj = 0; 4044 for (j=0; nextaj<anzi; j++){ 4045 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4046 ba_i[j] += aa[nextaj++]; 4047 } 4048 } 4049 nextrow[k]++; nextai[k]++; 4050 } 4051 } 4052 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4053 } 4054 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4055 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4056 4057 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4058 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4059 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4060 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4061 PetscFunctionReturn(0); 4062 } 4063 4064 #undef __FUNCT__ 4065 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4066 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4067 { 4068 PetscErrorCode ierr; 4069 Mat B_mpi; 4070 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4071 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4072 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4073 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4074 PetscInt len,proc,*dnz,*onz; 4075 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4076 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4077 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4078 MPI_Status *status; 4079 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4080 PetscBT lnkbt; 4081 Mat_Merge_SeqsToMPI *merge; 4082 PetscContainer container; 4083 4084 PetscFunctionBegin; 4085 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4086 4087 /* make sure it is a PETSc comm */ 4088 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4089 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4090 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4091 4092 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4093 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4094 4095 /* determine row ownership */ 4096 /*---------------------------------------------------------*/ 4097 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4098 merge->rowmap.n = m; 4099 merge->rowmap.N = M; 4100 merge->rowmap.bs = 1; 4101 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4102 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4103 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4104 4105 m = merge->rowmap.n; 4106 M = merge->rowmap.N; 4107 owners = merge->rowmap.range; 4108 4109 /* determine the number of messages to send, their lengths */ 4110 /*---------------------------------------------------------*/ 4111 len_s = merge->len_s; 4112 4113 len = 0; /* length of buf_si[] */ 4114 merge->nsend = 0; 4115 for (proc=0; proc<size; proc++){ 4116 len_si[proc] = 0; 4117 if (proc == rank){ 4118 len_s[proc] = 0; 4119 } else { 4120 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4121 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4122 } 4123 if (len_s[proc]) { 4124 merge->nsend++; 4125 nrows = 0; 4126 for (i=owners[proc]; i<owners[proc+1]; i++){ 4127 if (ai[i+1] > ai[i]) nrows++; 4128 } 4129 len_si[proc] = 2*(nrows+1); 4130 len += len_si[proc]; 4131 } 4132 } 4133 4134 /* determine the number and length of messages to receive for ij-structure */ 4135 /*-------------------------------------------------------------------------*/ 4136 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4137 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4138 4139 /* post the Irecv of j-structure */ 4140 /*-------------------------------*/ 4141 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4142 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4143 4144 /* post the Isend of j-structure */ 4145 /*--------------------------------*/ 4146 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4147 sj_waits = si_waits + merge->nsend; 4148 4149 for (proc=0, k=0; proc<size; proc++){ 4150 if (!len_s[proc]) continue; 4151 i = owners[proc]; 4152 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4153 k++; 4154 } 4155 4156 /* receives and sends of j-structure are complete */ 4157 /*------------------------------------------------*/ 4158 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4159 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4160 4161 /* send and recv i-structure */ 4162 /*---------------------------*/ 4163 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4164 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4165 4166 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4167 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4168 for (proc=0,k=0; proc<size; proc++){ 4169 if (!len_s[proc]) continue; 4170 /* form outgoing message for i-structure: 4171 buf_si[0]: nrows to be sent 4172 [1:nrows]: row index (global) 4173 [nrows+1:2*nrows+1]: i-structure index 4174 */ 4175 /*-------------------------------------------*/ 4176 nrows = len_si[proc]/2 - 1; 4177 buf_si_i = buf_si + nrows+1; 4178 buf_si[0] = nrows; 4179 buf_si_i[0] = 0; 4180 nrows = 0; 4181 for (i=owners[proc]; i<owners[proc+1]; i++){ 4182 anzi = ai[i+1] - ai[i]; 4183 if (anzi) { 4184 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4185 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4186 nrows++; 4187 } 4188 } 4189 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4190 k++; 4191 buf_si += len_si[proc]; 4192 } 4193 4194 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4195 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4196 4197 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4198 for (i=0; i<merge->nrecv; i++){ 4199 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4200 } 4201 4202 ierr = PetscFree(len_si);CHKERRQ(ierr); 4203 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4204 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4205 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4206 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4207 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4208 ierr = PetscFree(status);CHKERRQ(ierr); 4209 4210 /* compute a local seq matrix in each processor */ 4211 /*----------------------------------------------*/ 4212 /* allocate bi array and free space for accumulating nonzero column info */ 4213 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4214 bi[0] = 0; 4215 4216 /* create and initialize a linked list */ 4217 nlnk = N+1; 4218 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4219 4220 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4221 len = 0; 4222 len = ai[owners[rank+1]] - ai[owners[rank]]; 4223 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4224 current_space = free_space; 4225 4226 /* determine symbolic info for each local row */ 4227 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4228 nextrow = buf_ri_k + merge->nrecv; 4229 nextai = nextrow + merge->nrecv; 4230 for (k=0; k<merge->nrecv; k++){ 4231 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4232 nrows = *buf_ri_k[k]; 4233 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4234 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4235 } 4236 4237 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4238 len = 0; 4239 for (i=0;i<m;i++) { 4240 bnzi = 0; 4241 /* add local non-zero cols of this proc's seqmat into lnk */ 4242 arow = owners[rank] + i; 4243 anzi = ai[arow+1] - ai[arow]; 4244 aj = a->j + ai[arow]; 4245 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4246 bnzi += nlnk; 4247 /* add received col data into lnk */ 4248 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4249 if (i == *nextrow[k]) { /* i-th row */ 4250 anzi = *(nextai[k]+1) - *nextai[k]; 4251 aj = buf_rj[k] + *nextai[k]; 4252 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4253 bnzi += nlnk; 4254 nextrow[k]++; nextai[k]++; 4255 } 4256 } 4257 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4258 4259 /* if free space is not available, make more free space */ 4260 if (current_space->local_remaining<bnzi) { 4261 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4262 nspacedouble++; 4263 } 4264 /* copy data into free space, then initialize lnk */ 4265 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4266 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4267 4268 current_space->array += bnzi; 4269 current_space->local_used += bnzi; 4270 current_space->local_remaining -= bnzi; 4271 4272 bi[i+1] = bi[i] + bnzi; 4273 } 4274 4275 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4276 4277 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4278 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4279 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4280 4281 /* create symbolic parallel matrix B_mpi */ 4282 /*---------------------------------------*/ 4283 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4284 if (n==PETSC_DECIDE) { 4285 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4286 } else { 4287 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4288 } 4289 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4290 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4291 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4292 4293 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4294 B_mpi->assembled = PETSC_FALSE; 4295 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4296 merge->bi = bi; 4297 merge->bj = bj; 4298 merge->buf_ri = buf_ri; 4299 merge->buf_rj = buf_rj; 4300 merge->coi = PETSC_NULL; 4301 merge->coj = PETSC_NULL; 4302 merge->owners_co = PETSC_NULL; 4303 4304 /* attach the supporting struct to B_mpi for reuse */ 4305 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4306 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4307 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4308 *mpimat = B_mpi; 4309 4310 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4311 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4312 PetscFunctionReturn(0); 4313 } 4314 4315 #undef __FUNCT__ 4316 #define __FUNCT__ "MatMerge_SeqsToMPI" 4317 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4318 { 4319 PetscErrorCode ierr; 4320 4321 PetscFunctionBegin; 4322 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4323 if (scall == MAT_INITIAL_MATRIX){ 4324 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4325 } 4326 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4327 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4328 PetscFunctionReturn(0); 4329 } 4330 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "MatGetLocalMat" 4333 /*@ 4334 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4335 4336 Not Collective 4337 4338 Input Parameters: 4339 + A - the matrix 4340 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4341 4342 Output Parameter: 4343 . A_loc - the local sequential matrix generated 4344 4345 Level: developer 4346 4347 @*/ 4348 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4349 { 4350 PetscErrorCode ierr; 4351 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4352 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4353 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4354 MatScalar *aa=a->a,*ba=b->a,*cam; 4355 PetscScalar *ca; 4356 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4357 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4358 4359 PetscFunctionBegin; 4360 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4361 if (scall == MAT_INITIAL_MATRIX){ 4362 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4363 ci[0] = 0; 4364 for (i=0; i<am; i++){ 4365 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4366 } 4367 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4368 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4369 k = 0; 4370 for (i=0; i<am; i++) { 4371 ncols_o = bi[i+1] - bi[i]; 4372 ncols_d = ai[i+1] - ai[i]; 4373 /* off-diagonal portion of A */ 4374 for (jo=0; jo<ncols_o; jo++) { 4375 col = cmap[*bj]; 4376 if (col >= cstart) break; 4377 cj[k] = col; bj++; 4378 ca[k++] = *ba++; 4379 } 4380 /* diagonal portion of A */ 4381 for (j=0; j<ncols_d; j++) { 4382 cj[k] = cstart + *aj++; 4383 ca[k++] = *aa++; 4384 } 4385 /* off-diagonal portion of A */ 4386 for (j=jo; j<ncols_o; j++) { 4387 cj[k] = cmap[*bj++]; 4388 ca[k++] = *ba++; 4389 } 4390 } 4391 /* put together the new matrix */ 4392 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4393 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4394 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4395 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4396 mat->free_a = PETSC_TRUE; 4397 mat->free_ij = PETSC_TRUE; 4398 mat->nonew = 0; 4399 } else if (scall == MAT_REUSE_MATRIX){ 4400 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4401 ci = mat->i; cj = mat->j; cam = mat->a; 4402 for (i=0; i<am; i++) { 4403 /* off-diagonal portion of A */ 4404 ncols_o = bi[i+1] - bi[i]; 4405 for (jo=0; jo<ncols_o; jo++) { 4406 col = cmap[*bj]; 4407 if (col >= cstart) break; 4408 *cam++ = *ba++; bj++; 4409 } 4410 /* diagonal portion of A */ 4411 ncols_d = ai[i+1] - ai[i]; 4412 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4413 /* off-diagonal portion of A */ 4414 for (j=jo; j<ncols_o; j++) { 4415 *cam++ = *ba++; bj++; 4416 } 4417 } 4418 } else { 4419 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4420 } 4421 4422 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4423 PetscFunctionReturn(0); 4424 } 4425 4426 #undef __FUNCT__ 4427 #define __FUNCT__ "MatGetLocalMatCondensed" 4428 /*@C 4429 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4430 4431 Not Collective 4432 4433 Input Parameters: 4434 + A - the matrix 4435 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4436 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4437 4438 Output Parameter: 4439 . A_loc - the local sequential matrix generated 4440 4441 Level: developer 4442 4443 @*/ 4444 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4445 { 4446 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4447 PetscErrorCode ierr; 4448 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4449 IS isrowa,iscola; 4450 Mat *aloc; 4451 4452 PetscFunctionBegin; 4453 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4454 if (!row){ 4455 start = A->rmap->rstart; end = A->rmap->rend; 4456 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4457 } else { 4458 isrowa = *row; 4459 } 4460 if (!col){ 4461 start = A->cmap->rstart; 4462 cmap = a->garray; 4463 nzA = a->A->cmap->n; 4464 nzB = a->B->cmap->n; 4465 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4466 ncols = 0; 4467 for (i=0; i<nzB; i++) { 4468 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4469 else break; 4470 } 4471 imark = i; 4472 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4473 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4474 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4475 ierr = PetscFree(idx);CHKERRQ(ierr); 4476 } else { 4477 iscola = *col; 4478 } 4479 if (scall != MAT_INITIAL_MATRIX){ 4480 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4481 aloc[0] = *A_loc; 4482 } 4483 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4484 *A_loc = aloc[0]; 4485 ierr = PetscFree(aloc);CHKERRQ(ierr); 4486 if (!row){ 4487 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4488 } 4489 if (!col){ 4490 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4491 } 4492 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4493 PetscFunctionReturn(0); 4494 } 4495 4496 #undef __FUNCT__ 4497 #define __FUNCT__ "MatGetBrowsOfAcols" 4498 /*@C 4499 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4500 4501 Collective on Mat 4502 4503 Input Parameters: 4504 + A,B - the matrices in mpiaij format 4505 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4506 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4507 4508 Output Parameter: 4509 + rowb, colb - index sets of rows and columns of B to extract 4510 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4511 - B_seq - the sequential matrix generated 4512 4513 Level: developer 4514 4515 @*/ 4516 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4517 { 4518 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4519 PetscErrorCode ierr; 4520 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4521 IS isrowb,iscolb; 4522 Mat *bseq; 4523 4524 PetscFunctionBegin; 4525 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4526 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4527 } 4528 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4529 4530 if (scall == MAT_INITIAL_MATRIX){ 4531 start = A->cmap->rstart; 4532 cmap = a->garray; 4533 nzA = a->A->cmap->n; 4534 nzB = a->B->cmap->n; 4535 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4536 ncols = 0; 4537 for (i=0; i<nzB; i++) { /* row < local row index */ 4538 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4539 else break; 4540 } 4541 imark = i; 4542 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4543 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4544 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4545 ierr = PetscFree(idx);CHKERRQ(ierr); 4546 *brstart = imark; 4547 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4548 } else { 4549 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4550 isrowb = *rowb; iscolb = *colb; 4551 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4552 bseq[0] = *B_seq; 4553 } 4554 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4555 *B_seq = bseq[0]; 4556 ierr = PetscFree(bseq);CHKERRQ(ierr); 4557 if (!rowb){ 4558 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4559 } else { 4560 *rowb = isrowb; 4561 } 4562 if (!colb){ 4563 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4564 } else { 4565 *colb = iscolb; 4566 } 4567 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4568 PetscFunctionReturn(0); 4569 } 4570 4571 #undef __FUNCT__ 4572 #define __FUNCT__ "MatGetBrowsOfAoCols" 4573 /*@C 4574 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4575 of the OFF-DIAGONAL portion of local A 4576 4577 Collective on Mat 4578 4579 Input Parameters: 4580 + A,B - the matrices in mpiaij format 4581 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4582 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4583 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4584 4585 Output Parameter: 4586 + B_oth - the sequential matrix generated 4587 4588 Level: developer 4589 4590 @*/ 4591 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4592 { 4593 VecScatter_MPI_General *gen_to,*gen_from; 4594 PetscErrorCode ierr; 4595 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4596 Mat_SeqAIJ *b_oth; 4597 VecScatter ctx=a->Mvctx; 4598 MPI_Comm comm=((PetscObject)ctx)->comm; 4599 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4600 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4601 PetscScalar *rvalues,*svalues; 4602 MatScalar *b_otha,*bufa,*bufA; 4603 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4604 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4605 MPI_Status *sstatus,rstatus; 4606 PetscMPIInt jj; 4607 PetscInt *cols,sbs,rbs; 4608 PetscScalar *vals; 4609 4610 PetscFunctionBegin; 4611 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4612 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4613 } 4614 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4615 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4616 4617 gen_to = (VecScatter_MPI_General*)ctx->todata; 4618 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4619 rvalues = gen_from->values; /* holds the length of receiving row */ 4620 svalues = gen_to->values; /* holds the length of sending row */ 4621 nrecvs = gen_from->n; 4622 nsends = gen_to->n; 4623 4624 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4625 srow = gen_to->indices; /* local row index to be sent */ 4626 sstarts = gen_to->starts; 4627 sprocs = gen_to->procs; 4628 sstatus = gen_to->sstatus; 4629 sbs = gen_to->bs; 4630 rstarts = gen_from->starts; 4631 rprocs = gen_from->procs; 4632 rbs = gen_from->bs; 4633 4634 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4635 if (scall == MAT_INITIAL_MATRIX){ 4636 /* i-array */ 4637 /*---------*/ 4638 /* post receives */ 4639 for (i=0; i<nrecvs; i++){ 4640 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4641 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4642 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4643 } 4644 4645 /* pack the outgoing message */ 4646 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4647 rstartsj = sstartsj + nsends +1; 4648 sstartsj[0] = 0; rstartsj[0] = 0; 4649 len = 0; /* total length of j or a array to be sent */ 4650 k = 0; 4651 for (i=0; i<nsends; i++){ 4652 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4653 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4654 for (j=0; j<nrows; j++) { 4655 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4656 for (l=0; l<sbs; l++){ 4657 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4658 rowlen[j*sbs+l] = ncols; 4659 len += ncols; 4660 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4661 } 4662 k++; 4663 } 4664 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4665 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4666 } 4667 /* recvs and sends of i-array are completed */ 4668 i = nrecvs; 4669 while (i--) { 4670 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4671 } 4672 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4673 4674 /* allocate buffers for sending j and a arrays */ 4675 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4676 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4677 4678 /* create i-array of B_oth */ 4679 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4680 b_othi[0] = 0; 4681 len = 0; /* total length of j or a array to be received */ 4682 k = 0; 4683 for (i=0; i<nrecvs; i++){ 4684 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4685 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4686 for (j=0; j<nrows; j++) { 4687 b_othi[k+1] = b_othi[k] + rowlen[j]; 4688 len += rowlen[j]; k++; 4689 } 4690 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4691 } 4692 4693 /* allocate space for j and a arrrays of B_oth */ 4694 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4695 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4696 4697 /* j-array */ 4698 /*---------*/ 4699 /* post receives of j-array */ 4700 for (i=0; i<nrecvs; i++){ 4701 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4702 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4703 } 4704 4705 /* pack the outgoing message j-array */ 4706 k = 0; 4707 for (i=0; i<nsends; i++){ 4708 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4709 bufJ = bufj+sstartsj[i]; 4710 for (j=0; j<nrows; j++) { 4711 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4712 for (ll=0; ll<sbs; ll++){ 4713 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4714 for (l=0; l<ncols; l++){ 4715 *bufJ++ = cols[l]; 4716 } 4717 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4718 } 4719 } 4720 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4721 } 4722 4723 /* recvs and sends of j-array are completed */ 4724 i = nrecvs; 4725 while (i--) { 4726 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4727 } 4728 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4729 } else if (scall == MAT_REUSE_MATRIX){ 4730 sstartsj = *startsj; 4731 rstartsj = sstartsj + nsends +1; 4732 bufa = *bufa_ptr; 4733 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4734 b_otha = b_oth->a; 4735 } else { 4736 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4737 } 4738 4739 /* a-array */ 4740 /*---------*/ 4741 /* post receives of a-array */ 4742 for (i=0; i<nrecvs; i++){ 4743 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4744 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4745 } 4746 4747 /* pack the outgoing message a-array */ 4748 k = 0; 4749 for (i=0; i<nsends; i++){ 4750 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4751 bufA = bufa+sstartsj[i]; 4752 for (j=0; j<nrows; j++) { 4753 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4754 for (ll=0; ll<sbs; ll++){ 4755 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4756 for (l=0; l<ncols; l++){ 4757 *bufA++ = vals[l]; 4758 } 4759 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4760 } 4761 } 4762 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4763 } 4764 /* recvs and sends of a-array are completed */ 4765 i = nrecvs; 4766 while (i--) { 4767 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4768 } 4769 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4770 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4771 4772 if (scall == MAT_INITIAL_MATRIX){ 4773 /* put together the new matrix */ 4774 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4775 4776 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4777 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4778 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4779 b_oth->free_a = PETSC_TRUE; 4780 b_oth->free_ij = PETSC_TRUE; 4781 b_oth->nonew = 0; 4782 4783 ierr = PetscFree(bufj);CHKERRQ(ierr); 4784 if (!startsj || !bufa_ptr){ 4785 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4786 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4787 } else { 4788 *startsj = sstartsj; 4789 *bufa_ptr = bufa; 4790 } 4791 } 4792 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4793 PetscFunctionReturn(0); 4794 } 4795 4796 #undef __FUNCT__ 4797 #define __FUNCT__ "MatGetCommunicationStructs" 4798 /*@C 4799 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4800 4801 Not Collective 4802 4803 Input Parameters: 4804 . A - The matrix in mpiaij format 4805 4806 Output Parameter: 4807 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4808 . colmap - A map from global column index to local index into lvec 4809 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4810 4811 Level: developer 4812 4813 @*/ 4814 #if defined (PETSC_USE_CTABLE) 4815 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4816 #else 4817 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4818 #endif 4819 { 4820 Mat_MPIAIJ *a; 4821 4822 PetscFunctionBegin; 4823 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4824 PetscValidPointer(lvec, 2) 4825 PetscValidPointer(colmap, 3) 4826 PetscValidPointer(multScatter, 4) 4827 a = (Mat_MPIAIJ *) A->data; 4828 if (lvec) *lvec = a->lvec; 4829 if (colmap) *colmap = a->colmap; 4830 if (multScatter) *multScatter = a->Mvctx; 4831 PetscFunctionReturn(0); 4832 } 4833 4834 EXTERN_C_BEGIN 4835 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4836 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4837 EXTERN_C_END 4838 4839 #include "../src/mat/impls/dense/mpi/mpidense.h" 4840 4841 #undef __FUNCT__ 4842 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4843 /* 4844 Computes (B'*A')' since computing B*A directly is untenable 4845 4846 n p p 4847 ( ) ( ) ( ) 4848 m ( A ) * n ( B ) = m ( C ) 4849 ( ) ( ) ( ) 4850 4851 */ 4852 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4853 { 4854 PetscErrorCode ierr; 4855 Mat At,Bt,Ct; 4856 4857 PetscFunctionBegin; 4858 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4859 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4860 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4861 ierr = MatDestroy(At);CHKERRQ(ierr); 4862 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4863 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4864 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4865 PetscFunctionReturn(0); 4866 } 4867 4868 #undef __FUNCT__ 4869 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4870 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4871 { 4872 PetscErrorCode ierr; 4873 PetscInt m=A->rmap->n,n=B->cmap->n; 4874 Mat Cmat; 4875 4876 PetscFunctionBegin; 4877 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4878 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4879 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4880 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4881 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4882 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4883 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4884 *C = Cmat; 4885 PetscFunctionReturn(0); 4886 } 4887 4888 /* ----------------------------------------------------------------*/ 4889 #undef __FUNCT__ 4890 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4891 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4892 { 4893 PetscErrorCode ierr; 4894 4895 PetscFunctionBegin; 4896 if (scall == MAT_INITIAL_MATRIX){ 4897 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4898 } 4899 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4900 PetscFunctionReturn(0); 4901 } 4902 4903 EXTERN_C_BEGIN 4904 #if defined(PETSC_HAVE_MUMPS) 4905 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4906 #endif 4907 #if defined(PETSC_HAVE_PASTIX) 4908 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4909 #endif 4910 #if defined(PETSC_HAVE_SUPERLU_DIST) 4911 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4912 #endif 4913 #if defined(PETSC_HAVE_SPOOLES) 4914 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4915 #endif 4916 EXTERN_C_END 4917 4918 /*MC 4919 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4920 4921 Options Database Keys: 4922 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4923 4924 Level: beginner 4925 4926 .seealso: MatCreateMPIAIJ() 4927 M*/ 4928 4929 EXTERN_C_BEGIN 4930 #undef __FUNCT__ 4931 #define __FUNCT__ "MatCreate_MPIAIJ" 4932 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4933 { 4934 Mat_MPIAIJ *b; 4935 PetscErrorCode ierr; 4936 PetscMPIInt size; 4937 4938 PetscFunctionBegin; 4939 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4940 4941 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4942 B->data = (void*)b; 4943 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4944 B->rmap->bs = 1; 4945 B->assembled = PETSC_FALSE; 4946 B->mapping = 0; 4947 4948 B->insertmode = NOT_SET_VALUES; 4949 b->size = size; 4950 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4951 4952 /* build cache for off array entries formed */ 4953 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4954 b->donotstash = PETSC_FALSE; 4955 b->colmap = 0; 4956 b->garray = 0; 4957 b->roworiented = PETSC_TRUE; 4958 4959 /* stuff used for matrix vector multiply */ 4960 b->lvec = PETSC_NULL; 4961 b->Mvctx = PETSC_NULL; 4962 4963 /* stuff for MatGetRow() */ 4964 b->rowindices = 0; 4965 b->rowvalues = 0; 4966 b->getrowactive = PETSC_FALSE; 4967 4968 #if defined(PETSC_HAVE_SPOOLES) 4969 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 4970 "MatGetFactor_mpiaij_spooles", 4971 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 4972 #endif 4973 #if defined(PETSC_HAVE_MUMPS) 4974 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 4975 "MatGetFactor_mpiaij_mumps", 4976 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 4977 #endif 4978 #if defined(PETSC_HAVE_PASTIX) 4979 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 4980 "MatGetFactor_mpiaij_pastix", 4981 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 4982 #endif 4983 #if defined(PETSC_HAVE_SUPERLU_DIST) 4984 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 4985 "MatGetFactor_mpiaij_superlu_dist", 4986 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 4987 #endif 4988 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4989 "MatStoreValues_MPIAIJ", 4990 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4991 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4992 "MatRetrieveValues_MPIAIJ", 4993 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4994 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4995 "MatGetDiagonalBlock_MPIAIJ", 4996 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4997 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4998 "MatIsTranspose_MPIAIJ", 4999 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5000 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5001 "MatMPIAIJSetPreallocation_MPIAIJ", 5002 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5003 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5004 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5005 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5006 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5007 "MatDiagonalScaleLocal_MPIAIJ", 5008 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5009 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5010 "MatConvert_MPIAIJ_MPICSRPERM", 5011 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5012 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5013 "MatConvert_MPIAIJ_MPICRL", 5014 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5015 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5016 "MatMatMult_MPIDense_MPIAIJ", 5017 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5018 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5019 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5020 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5021 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5022 "MatMatMultNumeric_MPIDense_MPIAIJ", 5023 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5024 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5025 PetscFunctionReturn(0); 5026 } 5027 EXTERN_C_END 5028 5029 #undef __FUNCT__ 5030 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5031 /*@ 5032 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5033 and "off-diagonal" part of the matrix in CSR format. 5034 5035 Collective on MPI_Comm 5036 5037 Input Parameters: 5038 + comm - MPI communicator 5039 . m - number of local rows (Cannot be PETSC_DECIDE) 5040 . n - This value should be the same as the local size used in creating the 5041 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5042 calculated if N is given) For square matrices n is almost always m. 5043 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5044 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5045 . i - row indices for "diagonal" portion of matrix 5046 . j - column indices 5047 . a - matrix values 5048 . oi - row indices for "off-diagonal" portion of matrix 5049 . oj - column indices 5050 - oa - matrix values 5051 5052 Output Parameter: 5053 . mat - the matrix 5054 5055 Level: advanced 5056 5057 Notes: 5058 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5059 5060 The i and j indices are 0 based 5061 5062 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5063 5064 5065 .keywords: matrix, aij, compressed row, sparse, parallel 5066 5067 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5068 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5069 @*/ 5070 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5071 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5072 { 5073 PetscErrorCode ierr; 5074 Mat_MPIAIJ *maij; 5075 5076 PetscFunctionBegin; 5077 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5078 if (i[0]) { 5079 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5080 } 5081 if (oi[0]) { 5082 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5083 } 5084 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5085 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5086 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5087 maij = (Mat_MPIAIJ*) (*mat)->data; 5088 maij->donotstash = PETSC_TRUE; 5089 (*mat)->preallocated = PETSC_TRUE; 5090 5091 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5092 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5093 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5094 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5095 5096 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5097 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5098 5099 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5100 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5101 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5102 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5103 5104 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5105 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5106 PetscFunctionReturn(0); 5107 } 5108 5109 /* 5110 Special version for direct calls from Fortran 5111 */ 5112 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5113 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5114 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5115 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5116 #endif 5117 5118 /* Change these macros so can be used in void function */ 5119 #undef CHKERRQ 5120 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5121 #undef SETERRQ2 5122 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5123 #undef SETERRQ 5124 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5125 5126 EXTERN_C_BEGIN 5127 #undef __FUNCT__ 5128 #define __FUNCT__ "matsetvaluesmpiaij_" 5129 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5130 { 5131 Mat mat = *mmat; 5132 PetscInt m = *mm, n = *mn; 5133 InsertMode addv = *maddv; 5134 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5135 PetscScalar value; 5136 PetscErrorCode ierr; 5137 5138 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5139 if (mat->insertmode == NOT_SET_VALUES) { 5140 mat->insertmode = addv; 5141 } 5142 #if defined(PETSC_USE_DEBUG) 5143 else if (mat->insertmode != addv) { 5144 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5145 } 5146 #endif 5147 { 5148 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5149 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5150 PetscTruth roworiented = aij->roworiented; 5151 5152 /* Some Variables required in the macro */ 5153 Mat A = aij->A; 5154 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5155 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5156 MatScalar *aa = a->a; 5157 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5158 Mat B = aij->B; 5159 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5160 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5161 MatScalar *ba = b->a; 5162 5163 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5164 PetscInt nonew = a->nonew; 5165 MatScalar *ap1,*ap2; 5166 5167 PetscFunctionBegin; 5168 for (i=0; i<m; i++) { 5169 if (im[i] < 0) continue; 5170 #if defined(PETSC_USE_DEBUG) 5171 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5172 #endif 5173 if (im[i] >= rstart && im[i] < rend) { 5174 row = im[i] - rstart; 5175 lastcol1 = -1; 5176 rp1 = aj + ai[row]; 5177 ap1 = aa + ai[row]; 5178 rmax1 = aimax[row]; 5179 nrow1 = ailen[row]; 5180 low1 = 0; 5181 high1 = nrow1; 5182 lastcol2 = -1; 5183 rp2 = bj + bi[row]; 5184 ap2 = ba + bi[row]; 5185 rmax2 = bimax[row]; 5186 nrow2 = bilen[row]; 5187 low2 = 0; 5188 high2 = nrow2; 5189 5190 for (j=0; j<n; j++) { 5191 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5192 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5193 if (in[j] >= cstart && in[j] < cend){ 5194 col = in[j] - cstart; 5195 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5196 } else if (in[j] < 0) continue; 5197 #if defined(PETSC_USE_DEBUG) 5198 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5199 #endif 5200 else { 5201 if (mat->was_assembled) { 5202 if (!aij->colmap) { 5203 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5204 } 5205 #if defined (PETSC_USE_CTABLE) 5206 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5207 col--; 5208 #else 5209 col = aij->colmap[in[j]] - 1; 5210 #endif 5211 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5212 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5213 col = in[j]; 5214 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5215 B = aij->B; 5216 b = (Mat_SeqAIJ*)B->data; 5217 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5218 rp2 = bj + bi[row]; 5219 ap2 = ba + bi[row]; 5220 rmax2 = bimax[row]; 5221 nrow2 = bilen[row]; 5222 low2 = 0; 5223 high2 = nrow2; 5224 bm = aij->B->rmap->n; 5225 ba = b->a; 5226 } 5227 } else col = in[j]; 5228 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5229 } 5230 } 5231 } else { 5232 if (!aij->donotstash) { 5233 if (roworiented) { 5234 if (ignorezeroentries && v[i*n] == 0.0) continue; 5235 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5236 } else { 5237 if (ignorezeroentries && v[i] == 0.0) continue; 5238 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5239 } 5240 } 5241 } 5242 }} 5243 PetscFunctionReturnVoid(); 5244 } 5245 EXTERN_C_END 5246 5247