1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "../src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap->N; 932 header[2] = mat->cmap->N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap->n; 938 range = mat->rmap->range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap->n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap->n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap->n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap->n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap->N > 1024) { 1114 PetscTruth flg; 1115 1116 ierr = PetscOptionsHasName(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg);CHKERRQ(ierr); 1117 if (!flg) { 1118 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1119 } 1120 } 1121 1122 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1123 if (!rank) { 1124 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1125 } else { 1126 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1127 } 1128 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1129 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1130 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1132 1133 /* copy over the A part */ 1134 Aloc = (Mat_SeqAIJ*)aij->A->data; 1135 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1136 row = mat->rmap->rstart; 1137 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1138 for (i=0; i<m; i++) { 1139 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1140 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1141 } 1142 aj = Aloc->j; 1143 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1144 1145 /* copy over the B part */ 1146 Aloc = (Mat_SeqAIJ*)aij->B->data; 1147 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148 row = mat->rmap->rstart; 1149 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1150 ct = cols; 1151 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1155 } 1156 ierr = PetscFree(ct);CHKERRQ(ierr); 1157 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1158 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1159 /* 1160 Everyone has to call to draw the matrix since the graphics waits are 1161 synchronized across all processors that share the PetscDraw object 1162 */ 1163 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1164 if (!rank) { 1165 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1166 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1167 } 1168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169 ierr = MatDestroy(A);CHKERRQ(ierr); 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatView_MPIAIJ" 1176 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1177 { 1178 PetscErrorCode ierr; 1179 PetscTruth iascii,isdraw,issocket,isbinary; 1180 1181 PetscFunctionBegin; 1182 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1183 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1184 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1185 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1186 if (iascii || isdraw || isbinary || issocket) { 1187 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1188 } else { 1189 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatRelax_MPIAIJ" 1196 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1197 { 1198 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1199 PetscErrorCode ierr; 1200 Vec bb1; 1201 1202 PetscFunctionBegin; 1203 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1204 1205 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1206 if (flag & SOR_ZERO_INITIAL_GUESS) { 1207 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1208 its--; 1209 } 1210 1211 while (its--) { 1212 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 1215 /* update rhs: bb1 = bb - B*x */ 1216 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1217 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1218 1219 /* local sweep */ 1220 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1221 } 1222 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1223 if (flag & SOR_ZERO_INITIAL_GUESS) { 1224 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1225 its--; 1226 } 1227 while (its--) { 1228 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 1231 /* update rhs: bb1 = bb - B*x */ 1232 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1233 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1234 1235 /* local sweep */ 1236 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1237 } 1238 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1239 if (flag & SOR_ZERO_INITIAL_GUESS) { 1240 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1241 its--; 1242 } 1243 while (its--) { 1244 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 1247 /* update rhs: bb1 = bb - B*x */ 1248 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1249 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1250 1251 /* local sweep */ 1252 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1253 } 1254 } else { 1255 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1256 } 1257 1258 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatPermute_MPIAIJ" 1264 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1265 { 1266 MPI_Comm comm,pcomm; 1267 PetscInt first,local_size,nrows; 1268 const PetscInt *rows; 1269 int ntids; 1270 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1275 /* make a collective version of 'rowp' */ 1276 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1277 if (pcomm==comm) { 1278 crowp = rowp; 1279 } else { 1280 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1281 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1282 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1283 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1284 } 1285 /* collect the global row permutation and invert it */ 1286 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1287 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1288 if (pcomm!=comm) { 1289 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1290 } 1291 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1292 /* get the local target indices */ 1293 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1294 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1295 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1296 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1297 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1298 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1299 /* the column permutation is so much easier; 1300 make a local version of 'colp' and invert it */ 1301 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1302 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1303 if (ntids==1) { 1304 lcolp = colp; 1305 } else { 1306 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1307 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1308 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1309 } 1310 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1311 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1312 if (ntids>1) { 1313 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1314 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1315 } 1316 /* now we just get the submatrix */ 1317 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1318 /* clean up */ 1319 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1320 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1326 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1327 { 1328 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1329 Mat A = mat->A,B = mat->B; 1330 PetscErrorCode ierr; 1331 PetscReal isend[5],irecv[5]; 1332 1333 PetscFunctionBegin; 1334 info->block_size = 1.0; 1335 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1336 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1337 isend[3] = info->memory; isend[4] = info->mallocs; 1338 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1339 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1340 isend[3] += info->memory; isend[4] += info->mallocs; 1341 if (flag == MAT_LOCAL) { 1342 info->nz_used = isend[0]; 1343 info->nz_allocated = isend[1]; 1344 info->nz_unneeded = isend[2]; 1345 info->memory = isend[3]; 1346 info->mallocs = isend[4]; 1347 } else if (flag == MAT_GLOBAL_MAX) { 1348 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1349 info->nz_used = irecv[0]; 1350 info->nz_allocated = irecv[1]; 1351 info->nz_unneeded = irecv[2]; 1352 info->memory = irecv[3]; 1353 info->mallocs = irecv[4]; 1354 } else if (flag == MAT_GLOBAL_SUM) { 1355 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1356 info->nz_used = irecv[0]; 1357 info->nz_allocated = irecv[1]; 1358 info->nz_unneeded = irecv[2]; 1359 info->memory = irecv[3]; 1360 info->mallocs = irecv[4]; 1361 } 1362 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1363 info->fill_ratio_needed = 0; 1364 info->factor_mallocs = 0; 1365 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatSetOption_MPIAIJ" 1371 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1372 { 1373 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1374 PetscErrorCode ierr; 1375 1376 PetscFunctionBegin; 1377 switch (op) { 1378 case MAT_NEW_NONZERO_LOCATIONS: 1379 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1380 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1381 case MAT_KEEP_ZEROED_ROWS: 1382 case MAT_NEW_NONZERO_LOCATION_ERR: 1383 case MAT_USE_INODES: 1384 case MAT_IGNORE_ZERO_ENTRIES: 1385 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1386 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1387 break; 1388 case MAT_ROW_ORIENTED: 1389 a->roworiented = flg; 1390 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1391 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1392 break; 1393 case MAT_NEW_DIAGONALS: 1394 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1395 break; 1396 case MAT_IGNORE_OFF_PROC_ENTRIES: 1397 a->donotstash = PETSC_TRUE; 1398 break; 1399 case MAT_SYMMETRIC: 1400 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1401 break; 1402 case MAT_STRUCTURALLY_SYMMETRIC: 1403 case MAT_HERMITIAN: 1404 case MAT_SYMMETRY_ETERNAL: 1405 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1406 break; 1407 default: 1408 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatGetRow_MPIAIJ" 1415 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1416 { 1417 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1418 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1419 PetscErrorCode ierr; 1420 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1421 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1422 PetscInt *cmap,*idx_p; 1423 1424 PetscFunctionBegin; 1425 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1426 mat->getrowactive = PETSC_TRUE; 1427 1428 if (!mat->rowvalues && (idx || v)) { 1429 /* 1430 allocate enough space to hold information from the longest row. 1431 */ 1432 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1433 PetscInt max = 1,tmp; 1434 for (i=0; i<matin->rmap->n; i++) { 1435 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1436 if (max < tmp) { max = tmp; } 1437 } 1438 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1439 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1440 } 1441 1442 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1443 lrow = row - rstart; 1444 1445 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1446 if (!v) {pvA = 0; pvB = 0;} 1447 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1448 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1449 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1450 nztot = nzA + nzB; 1451 1452 cmap = mat->garray; 1453 if (v || idx) { 1454 if (nztot) { 1455 /* Sort by increasing column numbers, assuming A and B already sorted */ 1456 PetscInt imark = -1; 1457 if (v) { 1458 *v = v_p = mat->rowvalues; 1459 for (i=0; i<nzB; i++) { 1460 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1461 else break; 1462 } 1463 imark = i; 1464 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1465 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1466 } 1467 if (idx) { 1468 *idx = idx_p = mat->rowindices; 1469 if (imark > -1) { 1470 for (i=0; i<imark; i++) { 1471 idx_p[i] = cmap[cworkB[i]]; 1472 } 1473 } else { 1474 for (i=0; i<nzB; i++) { 1475 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1476 else break; 1477 } 1478 imark = i; 1479 } 1480 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1481 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1482 } 1483 } else { 1484 if (idx) *idx = 0; 1485 if (v) *v = 0; 1486 } 1487 } 1488 *nz = nztot; 1489 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1490 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1496 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1497 { 1498 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1499 1500 PetscFunctionBegin; 1501 if (!aij->getrowactive) { 1502 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1503 } 1504 aij->getrowactive = PETSC_FALSE; 1505 PetscFunctionReturn(0); 1506 } 1507 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatNorm_MPIAIJ" 1510 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1511 { 1512 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1513 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1514 PetscErrorCode ierr; 1515 PetscInt i,j,cstart = mat->cmap->rstart; 1516 PetscReal sum = 0.0; 1517 MatScalar *v; 1518 1519 PetscFunctionBegin; 1520 if (aij->size == 1) { 1521 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1522 } else { 1523 if (type == NORM_FROBENIUS) { 1524 v = amat->a; 1525 for (i=0; i<amat->nz; i++) { 1526 #if defined(PETSC_USE_COMPLEX) 1527 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1528 #else 1529 sum += (*v)*(*v); v++; 1530 #endif 1531 } 1532 v = bmat->a; 1533 for (i=0; i<bmat->nz; i++) { 1534 #if defined(PETSC_USE_COMPLEX) 1535 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1536 #else 1537 sum += (*v)*(*v); v++; 1538 #endif 1539 } 1540 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1541 *norm = sqrt(*norm); 1542 } else if (type == NORM_1) { /* max column norm */ 1543 PetscReal *tmp,*tmp2; 1544 PetscInt *jj,*garray = aij->garray; 1545 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1546 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1547 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1548 *norm = 0.0; 1549 v = amat->a; jj = amat->j; 1550 for (j=0; j<amat->nz; j++) { 1551 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1552 } 1553 v = bmat->a; jj = bmat->j; 1554 for (j=0; j<bmat->nz; j++) { 1555 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1556 } 1557 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1558 for (j=0; j<mat->cmap->N; j++) { 1559 if (tmp2[j] > *norm) *norm = tmp2[j]; 1560 } 1561 ierr = PetscFree(tmp);CHKERRQ(ierr); 1562 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1563 } else if (type == NORM_INFINITY) { /* max row norm */ 1564 PetscReal ntemp = 0.0; 1565 for (j=0; j<aij->A->rmap->n; j++) { 1566 v = amat->a + amat->i[j]; 1567 sum = 0.0; 1568 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1569 sum += PetscAbsScalar(*v); v++; 1570 } 1571 v = bmat->a + bmat->i[j]; 1572 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1573 sum += PetscAbsScalar(*v); v++; 1574 } 1575 if (sum > ntemp) ntemp = sum; 1576 } 1577 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1578 } else { 1579 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1580 } 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatTranspose_MPIAIJ" 1587 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1588 { 1589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1590 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1591 PetscErrorCode ierr; 1592 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1593 PetscInt cstart=A->cmap->rstart,ncol; 1594 Mat B; 1595 MatScalar *array; 1596 1597 PetscFunctionBegin; 1598 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1599 1600 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1601 ai = Aloc->i; aj = Aloc->j; 1602 bi = Bloc->i; bj = Bloc->j; 1603 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1604 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1605 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1606 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1607 for (i=0; i<ai[ma]; i++){ 1608 d_nnz[aj[i]] ++; 1609 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1610 } 1611 1612 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1613 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1614 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1615 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1616 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1617 } else { 1618 B = *matout; 1619 } 1620 1621 /* copy over the A part */ 1622 array = Aloc->a; 1623 row = A->rmap->rstart; 1624 for (i=0; i<ma; i++) { 1625 ncol = ai[i+1]-ai[i]; 1626 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1627 row++; array += ncol; aj += ncol; 1628 } 1629 aj = Aloc->j; 1630 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1631 1632 /* copy over the B part */ 1633 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1634 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1635 array = Bloc->a; 1636 row = A->rmap->rstart; 1637 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1638 cols_tmp = cols; 1639 for (i=0; i<mb; i++) { 1640 ncol = bi[i+1]-bi[i]; 1641 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1642 row++; array += ncol; cols_tmp += ncol; 1643 } 1644 ierr = PetscFree(cols);CHKERRQ(ierr); 1645 1646 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1647 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1648 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1649 *matout = B; 1650 } else { 1651 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1652 } 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1658 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1659 { 1660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1661 Mat a = aij->A,b = aij->B; 1662 PetscErrorCode ierr; 1663 PetscInt s1,s2,s3; 1664 1665 PetscFunctionBegin; 1666 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1667 if (rr) { 1668 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1669 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1670 /* Overlap communication with computation. */ 1671 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1672 } 1673 if (ll) { 1674 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1675 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1676 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1677 } 1678 /* scale the diagonal block */ 1679 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1680 1681 if (rr) { 1682 /* Do a scatter end and then right scale the off-diagonal block */ 1683 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1684 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1685 } 1686 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1692 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1693 { 1694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1695 PetscErrorCode ierr; 1696 1697 PetscFunctionBegin; 1698 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1699 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1704 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1705 { 1706 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1711 PetscFunctionReturn(0); 1712 } 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatEqual_MPIAIJ" 1716 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1717 { 1718 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1719 Mat a,b,c,d; 1720 PetscTruth flg; 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 a = matA->A; b = matA->B; 1725 c = matB->A; d = matB->B; 1726 1727 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1728 if (flg) { 1729 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1730 } 1731 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatCopy_MPIAIJ" 1737 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1738 { 1739 PetscErrorCode ierr; 1740 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1741 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1742 1743 PetscFunctionBegin; 1744 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1745 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1746 /* because of the column compression in the off-processor part of the matrix a->B, 1747 the number of columns in a->B and b->B may be different, hence we cannot call 1748 the MatCopy() directly on the two parts. If need be, we can provide a more 1749 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1750 then copying the submatrices */ 1751 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1752 } else { 1753 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1754 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1755 } 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1761 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1762 { 1763 PetscErrorCode ierr; 1764 1765 PetscFunctionBegin; 1766 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #include "petscblaslapack.h" 1771 #undef __FUNCT__ 1772 #define __FUNCT__ "MatAXPY_MPIAIJ" 1773 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1774 { 1775 PetscErrorCode ierr; 1776 PetscInt i; 1777 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1778 PetscBLASInt bnz,one=1; 1779 Mat_SeqAIJ *x,*y; 1780 1781 PetscFunctionBegin; 1782 if (str == SAME_NONZERO_PATTERN) { 1783 PetscScalar alpha = a; 1784 x = (Mat_SeqAIJ *)xx->A->data; 1785 y = (Mat_SeqAIJ *)yy->A->data; 1786 bnz = PetscBLASIntCast(x->nz); 1787 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1788 x = (Mat_SeqAIJ *)xx->B->data; 1789 y = (Mat_SeqAIJ *)yy->B->data; 1790 bnz = PetscBLASIntCast(x->nz); 1791 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1792 } else if (str == SUBSET_NONZERO_PATTERN) { 1793 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1794 1795 x = (Mat_SeqAIJ *)xx->B->data; 1796 y = (Mat_SeqAIJ *)yy->B->data; 1797 if (y->xtoy && y->XtoY != xx->B) { 1798 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1799 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1800 } 1801 if (!y->xtoy) { /* get xtoy */ 1802 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1803 y->XtoY = xx->B; 1804 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1805 } 1806 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1807 } else { 1808 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 1813 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatConjugate_MPIAIJ" 1817 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1818 { 1819 #if defined(PETSC_USE_COMPLEX) 1820 PetscErrorCode ierr; 1821 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1822 1823 PetscFunctionBegin; 1824 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1825 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1826 #else 1827 PetscFunctionBegin; 1828 #endif 1829 PetscFunctionReturn(0); 1830 } 1831 1832 #undef __FUNCT__ 1833 #define __FUNCT__ "MatRealPart_MPIAIJ" 1834 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1835 { 1836 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1837 PetscErrorCode ierr; 1838 1839 PetscFunctionBegin; 1840 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1841 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1842 PetscFunctionReturn(0); 1843 } 1844 1845 #undef __FUNCT__ 1846 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1847 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1848 { 1849 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1850 PetscErrorCode ierr; 1851 1852 PetscFunctionBegin; 1853 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1854 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #ifdef PETSC_HAVE_PBGL 1859 1860 #include <boost/parallel/mpi/bsp_process_group.hpp> 1861 #include <boost/graph/distributed/ilu_default_graph.hpp> 1862 #include <boost/graph/distributed/ilu_0_block.hpp> 1863 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1864 #include <boost/graph/distributed/petsc/interface.hpp> 1865 #include <boost/multi_array.hpp> 1866 #include <boost/parallel/distributed_property_map->hpp> 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1870 /* 1871 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1872 */ 1873 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1874 { 1875 namespace petsc = boost::distributed::petsc; 1876 1877 namespace graph_dist = boost::graph::distributed; 1878 using boost::graph::distributed::ilu_default::process_group_type; 1879 using boost::graph::ilu_permuted; 1880 1881 PetscTruth row_identity, col_identity; 1882 PetscContainer c; 1883 PetscInt m, n, M, N; 1884 PetscErrorCode ierr; 1885 1886 PetscFunctionBegin; 1887 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1888 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1889 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1890 if (!row_identity || !col_identity) { 1891 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1892 } 1893 1894 process_group_type pg; 1895 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1896 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1897 lgraph_type& level_graph = *lgraph_p; 1898 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1899 1900 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1901 ilu_permuted(level_graph); 1902 1903 /* put together the new matrix */ 1904 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1905 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1906 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1907 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1908 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1909 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1910 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1911 1912 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1913 ierr = PetscContainerSetPointer(c, lgraph_p); 1914 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1920 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1921 { 1922 PetscFunctionBegin; 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatSolve_MPIAIJ" 1928 /* 1929 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1930 */ 1931 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1932 { 1933 namespace graph_dist = boost::graph::distributed; 1934 1935 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1936 lgraph_type* lgraph_p; 1937 PetscContainer c; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1942 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1943 ierr = VecCopy(b, x); CHKERRQ(ierr); 1944 1945 PetscScalar* array_x; 1946 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1947 PetscInt sx; 1948 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1949 1950 PetscScalar* array_b; 1951 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1952 PetscInt sb; 1953 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1954 1955 lgraph_type& level_graph = *lgraph_p; 1956 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1957 1958 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1959 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1960 ref_x(array_x, boost::extents[num_vertices(graph)]); 1961 1962 typedef boost::iterator_property_map<array_ref_type::iterator, 1963 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1964 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1965 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1966 1967 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1968 1969 PetscFunctionReturn(0); 1970 } 1971 #endif 1972 1973 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1974 PetscInt nzlocal,nsends,nrecvs; 1975 PetscMPIInt *send_rank; 1976 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1977 PetscScalar *sbuf_a,**rbuf_a; 1978 PetscErrorCode (*MatDestroy)(Mat); 1979 } Mat_Redundant; 1980 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1983 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1984 { 1985 PetscErrorCode ierr; 1986 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1987 PetscInt i; 1988 1989 PetscFunctionBegin; 1990 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1991 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1992 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1993 for (i=0; i<redund->nrecvs; i++){ 1994 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1995 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1996 } 1997 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1998 ierr = PetscFree(redund);CHKERRQ(ierr); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatDestroy_MatRedundant" 2004 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2005 { 2006 PetscErrorCode ierr; 2007 PetscContainer container; 2008 Mat_Redundant *redund=PETSC_NULL; 2009 2010 PetscFunctionBegin; 2011 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2012 if (container) { 2013 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2014 } else { 2015 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2016 } 2017 A->ops->destroy = redund->MatDestroy; 2018 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2019 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2020 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2021 PetscFunctionReturn(0); 2022 } 2023 2024 #undef __FUNCT__ 2025 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2026 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2027 { 2028 PetscMPIInt rank,size; 2029 MPI_Comm comm=((PetscObject)mat)->comm; 2030 PetscErrorCode ierr; 2031 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2032 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2033 PetscInt *rowrange=mat->rmap->range; 2034 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2035 Mat A=aij->A,B=aij->B,C=*matredundant; 2036 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2037 PetscScalar *sbuf_a; 2038 PetscInt nzlocal=a->nz+b->nz; 2039 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2040 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2041 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2042 MatScalar *aworkA,*aworkB; 2043 PetscScalar *vals; 2044 PetscMPIInt tag1,tag2,tag3,imdex; 2045 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2046 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2047 MPI_Status recv_status,*send_status; 2048 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2049 PetscInt **rbuf_j=PETSC_NULL; 2050 PetscScalar **rbuf_a=PETSC_NULL; 2051 Mat_Redundant *redund=PETSC_NULL; 2052 PetscContainer container; 2053 2054 PetscFunctionBegin; 2055 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2056 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2057 2058 if (reuse == MAT_REUSE_MATRIX) { 2059 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2060 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2061 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2062 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2063 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2064 if (container) { 2065 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2066 } else { 2067 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2068 } 2069 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2070 2071 nsends = redund->nsends; 2072 nrecvs = redund->nrecvs; 2073 send_rank = redund->send_rank; recv_rank = send_rank + size; 2074 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2075 sbuf_j = redund->sbuf_j; 2076 sbuf_a = redund->sbuf_a; 2077 rbuf_j = redund->rbuf_j; 2078 rbuf_a = redund->rbuf_a; 2079 } 2080 2081 if (reuse == MAT_INITIAL_MATRIX){ 2082 PetscMPIInt subrank,subsize; 2083 PetscInt nleftover,np_subcomm; 2084 /* get the destination processors' id send_rank, nsends and nrecvs */ 2085 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2086 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2087 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2088 recv_rank = send_rank + size; 2089 np_subcomm = size/nsubcomm; 2090 nleftover = size - nsubcomm*np_subcomm; 2091 nsends = 0; nrecvs = 0; 2092 for (i=0; i<size; i++){ /* i=rank*/ 2093 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2094 send_rank[nsends] = i; nsends++; 2095 recv_rank[nrecvs++] = i; 2096 } 2097 } 2098 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2099 i = size-nleftover-1; 2100 j = 0; 2101 while (j < nsubcomm - nleftover){ 2102 send_rank[nsends++] = i; 2103 i--; j++; 2104 } 2105 } 2106 2107 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2108 for (i=0; i<nleftover; i++){ 2109 recv_rank[nrecvs++] = size-nleftover+i; 2110 } 2111 } 2112 2113 /* allocate sbuf_j, sbuf_a */ 2114 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2115 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2116 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2117 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2118 2119 /* copy mat's local entries into the buffers */ 2120 if (reuse == MAT_INITIAL_MATRIX){ 2121 rownz_max = 0; 2122 rptr = sbuf_j; 2123 cols = sbuf_j + rend-rstart + 1; 2124 vals = sbuf_a; 2125 rptr[0] = 0; 2126 for (i=0; i<rend-rstart; i++){ 2127 row = i + rstart; 2128 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2129 ncols = nzA + nzB; 2130 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2131 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2132 /* load the column indices for this row into cols */ 2133 lwrite = 0; 2134 for (l=0; l<nzB; l++) { 2135 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2136 vals[lwrite] = aworkB[l]; 2137 cols[lwrite++] = ctmp; 2138 } 2139 } 2140 for (l=0; l<nzA; l++){ 2141 vals[lwrite] = aworkA[l]; 2142 cols[lwrite++] = cstart + cworkA[l]; 2143 } 2144 for (l=0; l<nzB; l++) { 2145 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2146 vals[lwrite] = aworkB[l]; 2147 cols[lwrite++] = ctmp; 2148 } 2149 } 2150 vals += ncols; 2151 cols += ncols; 2152 rptr[i+1] = rptr[i] + ncols; 2153 if (rownz_max < ncols) rownz_max = ncols; 2154 } 2155 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2156 } else { /* only copy matrix values into sbuf_a */ 2157 rptr = sbuf_j; 2158 vals = sbuf_a; 2159 rptr[0] = 0; 2160 for (i=0; i<rend-rstart; i++){ 2161 row = i + rstart; 2162 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2163 ncols = nzA + nzB; 2164 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2165 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2166 lwrite = 0; 2167 for (l=0; l<nzB; l++) { 2168 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2169 } 2170 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2171 for (l=0; l<nzB; l++) { 2172 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2173 } 2174 vals += ncols; 2175 rptr[i+1] = rptr[i] + ncols; 2176 } 2177 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2178 2179 /* send nzlocal to others, and recv other's nzlocal */ 2180 /*--------------------------------------------------*/ 2181 if (reuse == MAT_INITIAL_MATRIX){ 2182 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2183 s_waits2 = s_waits3 + nsends; 2184 s_waits1 = s_waits2 + nsends; 2185 r_waits1 = s_waits1 + nsends; 2186 r_waits2 = r_waits1 + nrecvs; 2187 r_waits3 = r_waits2 + nrecvs; 2188 } else { 2189 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2190 r_waits3 = s_waits3 + nsends; 2191 } 2192 2193 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2194 if (reuse == MAT_INITIAL_MATRIX){ 2195 /* get new tags to keep the communication clean */ 2196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2198 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2199 rbuf_nz = sbuf_nz + nsends; 2200 2201 /* post receives of other's nzlocal */ 2202 for (i=0; i<nrecvs; i++){ 2203 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2204 } 2205 /* send nzlocal to others */ 2206 for (i=0; i<nsends; i++){ 2207 sbuf_nz[i] = nzlocal; 2208 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2209 } 2210 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2211 count = nrecvs; 2212 while (count) { 2213 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2214 recv_rank[imdex] = recv_status.MPI_SOURCE; 2215 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2216 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2217 2218 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2219 rbuf_nz[imdex] += i + 2; 2220 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2221 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2222 count--; 2223 } 2224 /* wait on sends of nzlocal */ 2225 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2226 /* send mat->i,j to others, and recv from other's */ 2227 /*------------------------------------------------*/ 2228 for (i=0; i<nsends; i++){ 2229 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2230 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2231 } 2232 /* wait on receives of mat->i,j */ 2233 /*------------------------------*/ 2234 count = nrecvs; 2235 while (count) { 2236 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2237 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2238 count--; 2239 } 2240 /* wait on sends of mat->i,j */ 2241 /*---------------------------*/ 2242 if (nsends) { 2243 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2244 } 2245 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2246 2247 /* post receives, send and receive mat->a */ 2248 /*----------------------------------------*/ 2249 for (imdex=0; imdex<nrecvs; imdex++) { 2250 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2251 } 2252 for (i=0; i<nsends; i++){ 2253 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2254 } 2255 count = nrecvs; 2256 while (count) { 2257 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2258 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2259 count--; 2260 } 2261 if (nsends) { 2262 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2263 } 2264 2265 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2266 2267 /* create redundant matrix */ 2268 /*-------------------------*/ 2269 if (reuse == MAT_INITIAL_MATRIX){ 2270 /* compute rownz_max for preallocation */ 2271 for (imdex=0; imdex<nrecvs; imdex++){ 2272 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2273 rptr = rbuf_j[imdex]; 2274 for (i=0; i<j; i++){ 2275 ncols = rptr[i+1] - rptr[i]; 2276 if (rownz_max < ncols) rownz_max = ncols; 2277 } 2278 } 2279 2280 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2281 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2282 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2283 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2284 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2285 } else { 2286 C = *matredundant; 2287 } 2288 2289 /* insert local matrix entries */ 2290 rptr = sbuf_j; 2291 cols = sbuf_j + rend-rstart + 1; 2292 vals = sbuf_a; 2293 for (i=0; i<rend-rstart; i++){ 2294 row = i + rstart; 2295 ncols = rptr[i+1] - rptr[i]; 2296 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2297 vals += ncols; 2298 cols += ncols; 2299 } 2300 /* insert received matrix entries */ 2301 for (imdex=0; imdex<nrecvs; imdex++){ 2302 rstart = rowrange[recv_rank[imdex]]; 2303 rend = rowrange[recv_rank[imdex]+1]; 2304 rptr = rbuf_j[imdex]; 2305 cols = rbuf_j[imdex] + rend-rstart + 1; 2306 vals = rbuf_a[imdex]; 2307 for (i=0; i<rend-rstart; i++){ 2308 row = i + rstart; 2309 ncols = rptr[i+1] - rptr[i]; 2310 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2311 vals += ncols; 2312 cols += ncols; 2313 } 2314 } 2315 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2316 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2317 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2318 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2319 if (reuse == MAT_INITIAL_MATRIX){ 2320 PetscContainer container; 2321 *matredundant = C; 2322 /* create a supporting struct and attach it to C for reuse */ 2323 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2324 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2325 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2326 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2327 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2328 2329 redund->nzlocal = nzlocal; 2330 redund->nsends = nsends; 2331 redund->nrecvs = nrecvs; 2332 redund->send_rank = send_rank; 2333 redund->sbuf_nz = sbuf_nz; 2334 redund->sbuf_j = sbuf_j; 2335 redund->sbuf_a = sbuf_a; 2336 redund->rbuf_j = rbuf_j; 2337 redund->rbuf_a = rbuf_a; 2338 2339 redund->MatDestroy = C->ops->destroy; 2340 C->ops->destroy = MatDestroy_MatRedundant; 2341 } 2342 PetscFunctionReturn(0); 2343 } 2344 2345 #undef __FUNCT__ 2346 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2347 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2348 { 2349 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2350 PetscErrorCode ierr; 2351 PetscInt i,*idxb = 0; 2352 PetscScalar *va,*vb; 2353 Vec vtmp; 2354 2355 PetscFunctionBegin; 2356 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2357 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2358 if (idx) { 2359 for (i=0; i<A->rmap->n; i++) { 2360 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2361 } 2362 } 2363 2364 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2365 if (idx) { 2366 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2367 } 2368 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2369 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2370 2371 for (i=0; i<A->rmap->n; i++){ 2372 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2373 va[i] = vb[i]; 2374 if (idx) idx[i] = a->garray[idxb[i]]; 2375 } 2376 } 2377 2378 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2379 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2380 if (idxb) { 2381 ierr = PetscFree(idxb);CHKERRQ(ierr); 2382 } 2383 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2389 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2390 { 2391 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2392 PetscErrorCode ierr; 2393 PetscInt i,*idxb = 0; 2394 PetscScalar *va,*vb; 2395 Vec vtmp; 2396 2397 PetscFunctionBegin; 2398 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2399 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2400 if (idx) { 2401 for (i=0; i<A->cmap->n; i++) { 2402 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2403 } 2404 } 2405 2406 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2407 if (idx) { 2408 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2409 } 2410 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2411 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2412 2413 for (i=0; i<A->rmap->n; i++){ 2414 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2415 va[i] = vb[i]; 2416 if (idx) idx[i] = a->garray[idxb[i]]; 2417 } 2418 } 2419 2420 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2421 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2422 if (idxb) { 2423 ierr = PetscFree(idxb);CHKERRQ(ierr); 2424 } 2425 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2426 PetscFunctionReturn(0); 2427 } 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2431 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2432 { 2433 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2434 PetscInt n = A->rmap->n; 2435 PetscInt cstart = A->cmap->rstart; 2436 PetscInt *cmap = mat->garray; 2437 PetscInt *diagIdx, *offdiagIdx; 2438 Vec diagV, offdiagV; 2439 PetscScalar *a, *diagA, *offdiagA; 2440 PetscInt r; 2441 PetscErrorCode ierr; 2442 2443 PetscFunctionBegin; 2444 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2445 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2446 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2447 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2448 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2449 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2450 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2451 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2452 for(r = 0; r < n; ++r) { 2453 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2454 a[r] = diagA[r]; 2455 idx[r] = cstart + diagIdx[r]; 2456 } else { 2457 a[r] = offdiagA[r]; 2458 idx[r] = cmap[offdiagIdx[r]]; 2459 } 2460 } 2461 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2462 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2464 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2465 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2466 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2472 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2473 { 2474 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2475 PetscInt n = A->rmap->n; 2476 PetscInt cstart = A->cmap->rstart; 2477 PetscInt *cmap = mat->garray; 2478 PetscInt *diagIdx, *offdiagIdx; 2479 Vec diagV, offdiagV; 2480 PetscScalar *a, *diagA, *offdiagA; 2481 PetscInt r; 2482 PetscErrorCode ierr; 2483 2484 PetscFunctionBegin; 2485 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2486 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2487 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2488 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2489 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2490 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2491 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2492 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2493 for(r = 0; r < n; ++r) { 2494 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2495 a[r] = diagA[r]; 2496 idx[r] = cstart + diagIdx[r]; 2497 } else { 2498 a[r] = offdiagA[r]; 2499 idx[r] = cmap[offdiagIdx[r]]; 2500 } 2501 } 2502 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2503 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2504 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2505 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2506 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2507 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2508 PetscFunctionReturn(0); 2509 } 2510 2511 #undef __FUNCT__ 2512 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2513 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2514 { 2515 PetscErrorCode ierr; 2516 2517 PetscFunctionBegin; 2518 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 /* -------------------------------------------------------------------*/ 2523 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2524 MatGetRow_MPIAIJ, 2525 MatRestoreRow_MPIAIJ, 2526 MatMult_MPIAIJ, 2527 /* 4*/ MatMultAdd_MPIAIJ, 2528 MatMultTranspose_MPIAIJ, 2529 MatMultTransposeAdd_MPIAIJ, 2530 #ifdef PETSC_HAVE_PBGL 2531 MatSolve_MPIAIJ, 2532 #else 2533 0, 2534 #endif 2535 0, 2536 0, 2537 /*10*/ 0, 2538 0, 2539 0, 2540 MatRelax_MPIAIJ, 2541 MatTranspose_MPIAIJ, 2542 /*15*/ MatGetInfo_MPIAIJ, 2543 MatEqual_MPIAIJ, 2544 MatGetDiagonal_MPIAIJ, 2545 MatDiagonalScale_MPIAIJ, 2546 MatNorm_MPIAIJ, 2547 /*20*/ MatAssemblyBegin_MPIAIJ, 2548 MatAssemblyEnd_MPIAIJ, 2549 0, 2550 MatSetOption_MPIAIJ, 2551 MatZeroEntries_MPIAIJ, 2552 /*25*/ MatZeroRows_MPIAIJ, 2553 0, 2554 #ifdef PETSC_HAVE_PBGL 2555 0, 2556 #else 2557 0, 2558 #endif 2559 0, 2560 0, 2561 /*30*/ MatSetUpPreallocation_MPIAIJ, 2562 #ifdef PETSC_HAVE_PBGL 2563 0, 2564 #else 2565 0, 2566 #endif 2567 0, 2568 0, 2569 0, 2570 /*35*/ MatDuplicate_MPIAIJ, 2571 0, 2572 0, 2573 0, 2574 0, 2575 /*40*/ MatAXPY_MPIAIJ, 2576 MatGetSubMatrices_MPIAIJ, 2577 MatIncreaseOverlap_MPIAIJ, 2578 MatGetValues_MPIAIJ, 2579 MatCopy_MPIAIJ, 2580 /*45*/ MatGetRowMax_MPIAIJ, 2581 MatScale_MPIAIJ, 2582 0, 2583 0, 2584 0, 2585 /*50*/ MatSetBlockSize_MPIAIJ, 2586 0, 2587 0, 2588 0, 2589 0, 2590 /*55*/ MatFDColoringCreate_MPIAIJ, 2591 0, 2592 MatSetUnfactored_MPIAIJ, 2593 MatPermute_MPIAIJ, 2594 0, 2595 /*60*/ MatGetSubMatrix_MPIAIJ, 2596 MatDestroy_MPIAIJ, 2597 MatView_MPIAIJ, 2598 0, 2599 0, 2600 /*65*/ 0, 2601 0, 2602 0, 2603 0, 2604 0, 2605 /*70*/ MatGetRowMaxAbs_MPIAIJ, 2606 MatGetRowMinAbs_MPIAIJ, 2607 0, 2608 MatSetColoring_MPIAIJ, 2609 #if defined(PETSC_HAVE_ADIC) 2610 MatSetValuesAdic_MPIAIJ, 2611 #else 2612 0, 2613 #endif 2614 MatSetValuesAdifor_MPIAIJ, 2615 /*75*/ 0, 2616 0, 2617 0, 2618 0, 2619 0, 2620 /*80*/ 0, 2621 0, 2622 0, 2623 /*84*/ MatLoad_MPIAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2630 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2631 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2632 MatPtAP_Basic, 2633 MatPtAPSymbolic_MPIAIJ, 2634 /*95*/ MatPtAPNumeric_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*100*/0, 2640 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2641 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2642 MatConjugate_MPIAIJ, 2643 0, 2644 /*105*/MatSetValuesRow_MPIAIJ, 2645 MatRealPart_MPIAIJ, 2646 MatImaginaryPart_MPIAIJ, 2647 0, 2648 0, 2649 /*110*/0, 2650 MatGetRedundantMatrix_MPIAIJ, 2651 MatGetRowMin_MPIAIJ, 2652 0, 2653 0, 2654 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2655 2656 /* ----------------------------------------------------------------------------------------*/ 2657 2658 EXTERN_C_BEGIN 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2661 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2662 { 2663 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2664 PetscErrorCode ierr; 2665 2666 PetscFunctionBegin; 2667 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2668 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2669 PetscFunctionReturn(0); 2670 } 2671 EXTERN_C_END 2672 2673 EXTERN_C_BEGIN 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2676 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2677 { 2678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2679 PetscErrorCode ierr; 2680 2681 PetscFunctionBegin; 2682 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2683 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2684 PetscFunctionReturn(0); 2685 } 2686 EXTERN_C_END 2687 2688 #include "petscpc.h" 2689 EXTERN_C_BEGIN 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2692 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2693 { 2694 Mat_MPIAIJ *b; 2695 PetscErrorCode ierr; 2696 PetscInt i; 2697 2698 PetscFunctionBegin; 2699 B->preallocated = PETSC_TRUE; 2700 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2701 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2702 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2703 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2704 2705 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2706 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2707 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2708 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2709 if (d_nnz) { 2710 for (i=0; i<B->rmap->n; i++) { 2711 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2712 } 2713 } 2714 if (o_nnz) { 2715 for (i=0; i<B->rmap->n; i++) { 2716 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2717 } 2718 } 2719 b = (Mat_MPIAIJ*)B->data; 2720 2721 /* Explicitly create 2 MATSEQAIJ matrices. */ 2722 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2723 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2724 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2725 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2726 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2727 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2728 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2729 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2730 2731 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2732 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2733 2734 PetscFunctionReturn(0); 2735 } 2736 EXTERN_C_END 2737 2738 #undef __FUNCT__ 2739 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2740 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2741 { 2742 Mat mat; 2743 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 *newmat = 0; 2748 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2749 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2750 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2751 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2752 a = (Mat_MPIAIJ*)mat->data; 2753 2754 mat->factor = matin->factor; 2755 mat->rmap->bs = matin->rmap->bs; 2756 mat->assembled = PETSC_TRUE; 2757 mat->insertmode = NOT_SET_VALUES; 2758 mat->preallocated = PETSC_TRUE; 2759 2760 a->size = oldmat->size; 2761 a->rank = oldmat->rank; 2762 a->donotstash = oldmat->donotstash; 2763 a->roworiented = oldmat->roworiented; 2764 a->rowindices = 0; 2765 a->rowvalues = 0; 2766 a->getrowactive = PETSC_FALSE; 2767 2768 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2769 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2770 2771 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2772 if (oldmat->colmap) { 2773 #if defined (PETSC_USE_CTABLE) 2774 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2775 #else 2776 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2777 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2778 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2779 #endif 2780 } else a->colmap = 0; 2781 if (oldmat->garray) { 2782 PetscInt len; 2783 len = oldmat->B->cmap->n; 2784 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2785 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2786 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2787 } else a->garray = 0; 2788 2789 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2791 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2793 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2795 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2796 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2797 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2798 *newmat = mat; 2799 PetscFunctionReturn(0); 2800 } 2801 2802 #include "petscsys.h" 2803 2804 #undef __FUNCT__ 2805 #define __FUNCT__ "MatLoad_MPIAIJ" 2806 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2807 { 2808 Mat A; 2809 PetscScalar *vals,*svals; 2810 MPI_Comm comm = ((PetscObject)viewer)->comm; 2811 MPI_Status status; 2812 PetscErrorCode ierr; 2813 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2814 PetscInt i,nz,j,rstart,rend,mmax; 2815 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2816 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2817 PetscInt cend,cstart,n,*rowners; 2818 int fd; 2819 2820 PetscFunctionBegin; 2821 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2822 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2823 if (!rank) { 2824 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2825 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2826 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2827 } 2828 2829 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2830 M = header[1]; N = header[2]; 2831 /* determine ownership of all rows */ 2832 m = M/size + ((M % size) > rank); 2833 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2834 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2835 2836 /* First process needs enough room for process with most rows */ 2837 if (!rank) { 2838 mmax = rowners[1]; 2839 for (i=2; i<size; i++) { 2840 mmax = PetscMax(mmax,rowners[i]); 2841 } 2842 } else mmax = m; 2843 2844 rowners[0] = 0; 2845 for (i=2; i<=size; i++) { 2846 rowners[i] += rowners[i-1]; 2847 } 2848 rstart = rowners[rank]; 2849 rend = rowners[rank+1]; 2850 2851 /* distribute row lengths to all processors */ 2852 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2853 if (!rank) { 2854 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2855 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2856 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2857 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2858 for (j=0; j<m; j++) { 2859 procsnz[0] += ourlens[j]; 2860 } 2861 for (i=1; i<size; i++) { 2862 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2863 /* calculate the number of nonzeros on each processor */ 2864 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2865 procsnz[i] += rowlengths[j]; 2866 } 2867 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2868 } 2869 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2870 } else { 2871 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2872 } 2873 2874 if (!rank) { 2875 /* determine max buffer needed and allocate it */ 2876 maxnz = 0; 2877 for (i=0; i<size; i++) { 2878 maxnz = PetscMax(maxnz,procsnz[i]); 2879 } 2880 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2881 2882 /* read in my part of the matrix column indices */ 2883 nz = procsnz[0]; 2884 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2885 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2886 2887 /* read in every one elses and ship off */ 2888 for (i=1; i<size; i++) { 2889 nz = procsnz[i]; 2890 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2891 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2892 } 2893 ierr = PetscFree(cols);CHKERRQ(ierr); 2894 } else { 2895 /* determine buffer space needed for message */ 2896 nz = 0; 2897 for (i=0; i<m; i++) { 2898 nz += ourlens[i]; 2899 } 2900 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2901 2902 /* receive message of column indices*/ 2903 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2904 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2905 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2906 } 2907 2908 /* determine column ownership if matrix is not square */ 2909 if (N != M) { 2910 n = N/size + ((N % size) > rank); 2911 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2912 cstart = cend - n; 2913 } else { 2914 cstart = rstart; 2915 cend = rend; 2916 n = cend - cstart; 2917 } 2918 2919 /* loop over local rows, determining number of off diagonal entries */ 2920 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2921 jj = 0; 2922 for (i=0; i<m; i++) { 2923 for (j=0; j<ourlens[i]; j++) { 2924 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2925 jj++; 2926 } 2927 } 2928 2929 /* create our matrix */ 2930 for (i=0; i<m; i++) { 2931 ourlens[i] -= offlens[i]; 2932 } 2933 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2934 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2935 ierr = MatSetType(A,type);CHKERRQ(ierr); 2936 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2937 2938 for (i=0; i<m; i++) { 2939 ourlens[i] += offlens[i]; 2940 } 2941 2942 if (!rank) { 2943 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2944 2945 /* read in my part of the matrix numerical values */ 2946 nz = procsnz[0]; 2947 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2948 2949 /* insert into matrix */ 2950 jj = rstart; 2951 smycols = mycols; 2952 svals = vals; 2953 for (i=0; i<m; i++) { 2954 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2955 smycols += ourlens[i]; 2956 svals += ourlens[i]; 2957 jj++; 2958 } 2959 2960 /* read in other processors and ship out */ 2961 for (i=1; i<size; i++) { 2962 nz = procsnz[i]; 2963 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2964 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2965 } 2966 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2967 } else { 2968 /* receive numeric values */ 2969 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2970 2971 /* receive message of values*/ 2972 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2973 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2974 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2975 2976 /* insert into matrix */ 2977 jj = rstart; 2978 smycols = mycols; 2979 svals = vals; 2980 for (i=0; i<m; i++) { 2981 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2982 smycols += ourlens[i]; 2983 svals += ourlens[i]; 2984 jj++; 2985 } 2986 } 2987 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2988 ierr = PetscFree(vals);CHKERRQ(ierr); 2989 ierr = PetscFree(mycols);CHKERRQ(ierr); 2990 ierr = PetscFree(rowners);CHKERRQ(ierr); 2991 2992 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2993 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2994 *newmat = A; 2995 PetscFunctionReturn(0); 2996 } 2997 2998 #undef __FUNCT__ 2999 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3000 /* 3001 Not great since it makes two copies of the submatrix, first an SeqAIJ 3002 in local and then by concatenating the local matrices the end result. 3003 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3004 */ 3005 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3006 { 3007 PetscErrorCode ierr; 3008 PetscMPIInt rank,size; 3009 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3010 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3011 Mat *local,M,Mreuse; 3012 MatScalar *vwork,*aa; 3013 MPI_Comm comm = ((PetscObject)mat)->comm; 3014 Mat_SeqAIJ *aij; 3015 3016 3017 PetscFunctionBegin; 3018 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3019 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3020 3021 if (call == MAT_REUSE_MATRIX) { 3022 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3023 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3024 local = &Mreuse; 3025 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3026 } else { 3027 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3028 Mreuse = *local; 3029 ierr = PetscFree(local);CHKERRQ(ierr); 3030 } 3031 3032 /* 3033 m - number of local rows 3034 n - number of columns (same on all processors) 3035 rstart - first row in new global matrix generated 3036 */ 3037 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3038 if (call == MAT_INITIAL_MATRIX) { 3039 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3040 ii = aij->i; 3041 jj = aij->j; 3042 3043 /* 3044 Determine the number of non-zeros in the diagonal and off-diagonal 3045 portions of the matrix in order to do correct preallocation 3046 */ 3047 3048 /* first get start and end of "diagonal" columns */ 3049 if (csize == PETSC_DECIDE) { 3050 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3051 if (mglobal == n) { /* square matrix */ 3052 nlocal = m; 3053 } else { 3054 nlocal = n/size + ((n % size) > rank); 3055 } 3056 } else { 3057 nlocal = csize; 3058 } 3059 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3060 rstart = rend - nlocal; 3061 if (rank == size - 1 && rend != n) { 3062 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3063 } 3064 3065 /* next, compute all the lengths */ 3066 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3067 olens = dlens + m; 3068 for (i=0; i<m; i++) { 3069 jend = ii[i+1] - ii[i]; 3070 olen = 0; 3071 dlen = 0; 3072 for (j=0; j<jend; j++) { 3073 if (*jj < rstart || *jj >= rend) olen++; 3074 else dlen++; 3075 jj++; 3076 } 3077 olens[i] = olen; 3078 dlens[i] = dlen; 3079 } 3080 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3081 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3082 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3083 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3084 ierr = PetscFree(dlens);CHKERRQ(ierr); 3085 } else { 3086 PetscInt ml,nl; 3087 3088 M = *newmat; 3089 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3090 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3091 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3092 /* 3093 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3094 rather than the slower MatSetValues(). 3095 */ 3096 M->was_assembled = PETSC_TRUE; 3097 M->assembled = PETSC_FALSE; 3098 } 3099 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3100 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3101 ii = aij->i; 3102 jj = aij->j; 3103 aa = aij->a; 3104 for (i=0; i<m; i++) { 3105 row = rstart + i; 3106 nz = ii[i+1] - ii[i]; 3107 cwork = jj; jj += nz; 3108 vwork = aa; aa += nz; 3109 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3110 } 3111 3112 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3113 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3114 *newmat = M; 3115 3116 /* save submatrix used in processor for next request */ 3117 if (call == MAT_INITIAL_MATRIX) { 3118 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3119 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3120 } 3121 3122 PetscFunctionReturn(0); 3123 } 3124 3125 EXTERN_C_BEGIN 3126 #undef __FUNCT__ 3127 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3128 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3129 { 3130 PetscInt m,cstart, cend,j,nnz,i,d; 3131 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3132 const PetscInt *JJ; 3133 PetscScalar *values; 3134 PetscErrorCode ierr; 3135 3136 PetscFunctionBegin; 3137 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3138 3139 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3140 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3141 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3142 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3143 m = B->rmap->n; 3144 cstart = B->cmap->rstart; 3145 cend = B->cmap->rend; 3146 rstart = B->rmap->rstart; 3147 3148 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3149 o_nnz = d_nnz + m; 3150 3151 #if defined(PETSC_USE_DEBUGGING) 3152 for (i=0; i<m; i++) { 3153 nnz = Ii[i+1]- Ii[i]; 3154 JJ = J + Ii[i]; 3155 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3156 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3157 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3158 for (j=1; j<nnz; j++) { 3159 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3160 } 3161 } 3162 #endif 3163 3164 for (i=0; i<m; i++) { 3165 nnz = Ii[i+1]- Ii[i]; 3166 JJ = J + Ii[i]; 3167 nnz_max = PetscMax(nnz_max,nnz); 3168 for (j=0; j<nnz; j++) { 3169 if (*JJ >= cstart) break; 3170 JJ++; 3171 } 3172 d = 0; 3173 for (; j<nnz; j++) { 3174 if (*JJ++ >= cend) break; 3175 d++; 3176 } 3177 d_nnz[i] = d; 3178 o_nnz[i] = nnz - d; 3179 } 3180 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3181 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3182 3183 if (v) values = (PetscScalar*)v; 3184 else { 3185 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3186 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3187 } 3188 3189 for (i=0; i<m; i++) { 3190 ii = i + rstart; 3191 nnz = Ii[i+1]- Ii[i]; 3192 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3193 } 3194 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3195 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3196 3197 if (!v) { 3198 ierr = PetscFree(values);CHKERRQ(ierr); 3199 } 3200 PetscFunctionReturn(0); 3201 } 3202 EXTERN_C_END 3203 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3206 /*@ 3207 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3208 (the default parallel PETSc format). 3209 3210 Collective on MPI_Comm 3211 3212 Input Parameters: 3213 + B - the matrix 3214 . i - the indices into j for the start of each local row (starts with zero) 3215 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3216 - v - optional values in the matrix 3217 3218 Level: developer 3219 3220 Notes: 3221 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3222 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3223 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3224 3225 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3226 3227 The format which is used for the sparse matrix input, is equivalent to a 3228 row-major ordering.. i.e for the following matrix, the input data expected is 3229 as shown: 3230 3231 1 0 0 3232 2 0 3 P0 3233 ------- 3234 4 5 6 P1 3235 3236 Process0 [P0]: rows_owned=[0,1] 3237 i = {0,1,3} [size = nrow+1 = 2+1] 3238 j = {0,0,2} [size = nz = 6] 3239 v = {1,2,3} [size = nz = 6] 3240 3241 Process1 [P1]: rows_owned=[2] 3242 i = {0,3} [size = nrow+1 = 1+1] 3243 j = {0,1,2} [size = nz = 6] 3244 v = {4,5,6} [size = nz = 6] 3245 3246 The column indices for each row MUST be sorted. 3247 3248 .keywords: matrix, aij, compressed row, sparse, parallel 3249 3250 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3251 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3252 @*/ 3253 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3254 { 3255 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3256 3257 PetscFunctionBegin; 3258 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3259 if (f) { 3260 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3261 } 3262 PetscFunctionReturn(0); 3263 } 3264 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3267 /*@C 3268 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3269 (the default parallel PETSc format). For good matrix assembly performance 3270 the user should preallocate the matrix storage by setting the parameters 3271 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3272 performance can be increased by more than a factor of 50. 3273 3274 Collective on MPI_Comm 3275 3276 Input Parameters: 3277 + A - the matrix 3278 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3279 (same value is used for all local rows) 3280 . d_nnz - array containing the number of nonzeros in the various rows of the 3281 DIAGONAL portion of the local submatrix (possibly different for each row) 3282 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3283 The size of this array is equal to the number of local rows, i.e 'm'. 3284 You must leave room for the diagonal entry even if it is zero. 3285 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3286 submatrix (same value is used for all local rows). 3287 - o_nnz - array containing the number of nonzeros in the various rows of the 3288 OFF-DIAGONAL portion of the local submatrix (possibly different for 3289 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3290 structure. The size of this array is equal to the number 3291 of local rows, i.e 'm'. 3292 3293 If the *_nnz parameter is given then the *_nz parameter is ignored 3294 3295 The AIJ format (also called the Yale sparse matrix format or 3296 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3297 storage. The stored row and column indices begin with zero. See the users manual for details. 3298 3299 The parallel matrix is partitioned such that the first m0 rows belong to 3300 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3301 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3302 3303 The DIAGONAL portion of the local submatrix of a processor can be defined 3304 as the submatrix which is obtained by extraction the part corresponding 3305 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3306 first row that belongs to the processor, and r2 is the last row belonging 3307 to the this processor. This is a square mxm matrix. The remaining portion 3308 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3309 3310 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3311 3312 You can call MatGetInfo() to get information on how effective the preallocation was; 3313 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3314 You can also run with the option -info and look for messages with the string 3315 malloc in them to see if additional memory allocation was needed. 3316 3317 Example usage: 3318 3319 Consider the following 8x8 matrix with 34 non-zero values, that is 3320 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3321 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3322 as follows: 3323 3324 .vb 3325 1 2 0 | 0 3 0 | 0 4 3326 Proc0 0 5 6 | 7 0 0 | 8 0 3327 9 0 10 | 11 0 0 | 12 0 3328 ------------------------------------- 3329 13 0 14 | 15 16 17 | 0 0 3330 Proc1 0 18 0 | 19 20 21 | 0 0 3331 0 0 0 | 22 23 0 | 24 0 3332 ------------------------------------- 3333 Proc2 25 26 27 | 0 0 28 | 29 0 3334 30 0 0 | 31 32 33 | 0 34 3335 .ve 3336 3337 This can be represented as a collection of submatrices as: 3338 3339 .vb 3340 A B C 3341 D E F 3342 G H I 3343 .ve 3344 3345 Where the submatrices A,B,C are owned by proc0, D,E,F are 3346 owned by proc1, G,H,I are owned by proc2. 3347 3348 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3349 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3350 The 'M','N' parameters are 8,8, and have the same values on all procs. 3351 3352 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3353 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3354 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3355 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3356 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3357 matrix, ans [DF] as another SeqAIJ matrix. 3358 3359 When d_nz, o_nz parameters are specified, d_nz storage elements are 3360 allocated for every row of the local diagonal submatrix, and o_nz 3361 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3362 One way to choose d_nz and o_nz is to use the max nonzerors per local 3363 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3364 In this case, the values of d_nz,o_nz are: 3365 .vb 3366 proc0 : dnz = 2, o_nz = 2 3367 proc1 : dnz = 3, o_nz = 2 3368 proc2 : dnz = 1, o_nz = 4 3369 .ve 3370 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3371 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3372 for proc3. i.e we are using 12+15+10=37 storage locations to store 3373 34 values. 3374 3375 When d_nnz, o_nnz parameters are specified, the storage is specified 3376 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3377 In the above case the values for d_nnz,o_nnz are: 3378 .vb 3379 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3380 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3381 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3382 .ve 3383 Here the space allocated is sum of all the above values i.e 34, and 3384 hence pre-allocation is perfect. 3385 3386 Level: intermediate 3387 3388 .keywords: matrix, aij, compressed row, sparse, parallel 3389 3390 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3391 MPIAIJ, MatGetInfo() 3392 @*/ 3393 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3394 { 3395 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3396 3397 PetscFunctionBegin; 3398 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3399 if (f) { 3400 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3401 } 3402 PetscFunctionReturn(0); 3403 } 3404 3405 #undef __FUNCT__ 3406 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3407 /*@ 3408 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3409 CSR format the local rows. 3410 3411 Collective on MPI_Comm 3412 3413 Input Parameters: 3414 + comm - MPI communicator 3415 . m - number of local rows (Cannot be PETSC_DECIDE) 3416 . n - This value should be the same as the local size used in creating the 3417 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3418 calculated if N is given) For square matrices n is almost always m. 3419 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3420 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3421 . i - row indices 3422 . j - column indices 3423 - a - matrix values 3424 3425 Output Parameter: 3426 . mat - the matrix 3427 3428 Level: intermediate 3429 3430 Notes: 3431 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3432 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3433 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3434 3435 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3436 3437 The format which is used for the sparse matrix input, is equivalent to a 3438 row-major ordering.. i.e for the following matrix, the input data expected is 3439 as shown: 3440 3441 1 0 0 3442 2 0 3 P0 3443 ------- 3444 4 5 6 P1 3445 3446 Process0 [P0]: rows_owned=[0,1] 3447 i = {0,1,3} [size = nrow+1 = 2+1] 3448 j = {0,0,2} [size = nz = 6] 3449 v = {1,2,3} [size = nz = 6] 3450 3451 Process1 [P1]: rows_owned=[2] 3452 i = {0,3} [size = nrow+1 = 1+1] 3453 j = {0,1,2} [size = nz = 6] 3454 v = {4,5,6} [size = nz = 6] 3455 3456 .keywords: matrix, aij, compressed row, sparse, parallel 3457 3458 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3459 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3460 @*/ 3461 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3462 { 3463 PetscErrorCode ierr; 3464 3465 PetscFunctionBegin; 3466 if (i[0]) { 3467 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3468 } 3469 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3470 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3471 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3472 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3473 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3474 PetscFunctionReturn(0); 3475 } 3476 3477 #undef __FUNCT__ 3478 #define __FUNCT__ "MatCreateMPIAIJ" 3479 /*@C 3480 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3481 (the default parallel PETSc format). For good matrix assembly performance 3482 the user should preallocate the matrix storage by setting the parameters 3483 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3484 performance can be increased by more than a factor of 50. 3485 3486 Collective on MPI_Comm 3487 3488 Input Parameters: 3489 + comm - MPI communicator 3490 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3491 This value should be the same as the local size used in creating the 3492 y vector for the matrix-vector product y = Ax. 3493 . n - This value should be the same as the local size used in creating the 3494 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3495 calculated if N is given) For square matrices n is almost always m. 3496 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3497 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3498 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3499 (same value is used for all local rows) 3500 . d_nnz - array containing the number of nonzeros in the various rows of the 3501 DIAGONAL portion of the local submatrix (possibly different for each row) 3502 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3503 The size of this array is equal to the number of local rows, i.e 'm'. 3504 You must leave room for the diagonal entry even if it is zero. 3505 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3506 submatrix (same value is used for all local rows). 3507 - o_nnz - array containing the number of nonzeros in the various rows of the 3508 OFF-DIAGONAL portion of the local submatrix (possibly different for 3509 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3510 structure. The size of this array is equal to the number 3511 of local rows, i.e 'm'. 3512 3513 Output Parameter: 3514 . A - the matrix 3515 3516 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3517 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3518 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3519 3520 Notes: 3521 If the *_nnz parameter is given then the *_nz parameter is ignored 3522 3523 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3524 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3525 storage requirements for this matrix. 3526 3527 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3528 processor than it must be used on all processors that share the object for 3529 that argument. 3530 3531 The user MUST specify either the local or global matrix dimensions 3532 (possibly both). 3533 3534 The parallel matrix is partitioned across processors such that the 3535 first m0 rows belong to process 0, the next m1 rows belong to 3536 process 1, the next m2 rows belong to process 2 etc.. where 3537 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3538 values corresponding to [m x N] submatrix. 3539 3540 The columns are logically partitioned with the n0 columns belonging 3541 to 0th partition, the next n1 columns belonging to the next 3542 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3543 3544 The DIAGONAL portion of the local submatrix on any given processor 3545 is the submatrix corresponding to the rows and columns m,n 3546 corresponding to the given processor. i.e diagonal matrix on 3547 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3548 etc. The remaining portion of the local submatrix [m x (N-n)] 3549 constitute the OFF-DIAGONAL portion. The example below better 3550 illustrates this concept. 3551 3552 For a square global matrix we define each processor's diagonal portion 3553 to be its local rows and the corresponding columns (a square submatrix); 3554 each processor's off-diagonal portion encompasses the remainder of the 3555 local matrix (a rectangular submatrix). 3556 3557 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3558 3559 When calling this routine with a single process communicator, a matrix of 3560 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3561 type of communicator, use the construction mechanism: 3562 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3563 3564 By default, this format uses inodes (identical nodes) when possible. 3565 We search for consecutive rows with the same nonzero structure, thereby 3566 reusing matrix information to achieve increased efficiency. 3567 3568 Options Database Keys: 3569 + -mat_no_inode - Do not use inodes 3570 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3571 - -mat_aij_oneindex - Internally use indexing starting at 1 3572 rather than 0. Note that when calling MatSetValues(), 3573 the user still MUST index entries starting at 0! 3574 3575 3576 Example usage: 3577 3578 Consider the following 8x8 matrix with 34 non-zero values, that is 3579 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3580 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3581 as follows: 3582 3583 .vb 3584 1 2 0 | 0 3 0 | 0 4 3585 Proc0 0 5 6 | 7 0 0 | 8 0 3586 9 0 10 | 11 0 0 | 12 0 3587 ------------------------------------- 3588 13 0 14 | 15 16 17 | 0 0 3589 Proc1 0 18 0 | 19 20 21 | 0 0 3590 0 0 0 | 22 23 0 | 24 0 3591 ------------------------------------- 3592 Proc2 25 26 27 | 0 0 28 | 29 0 3593 30 0 0 | 31 32 33 | 0 34 3594 .ve 3595 3596 This can be represented as a collection of submatrices as: 3597 3598 .vb 3599 A B C 3600 D E F 3601 G H I 3602 .ve 3603 3604 Where the submatrices A,B,C are owned by proc0, D,E,F are 3605 owned by proc1, G,H,I are owned by proc2. 3606 3607 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3608 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3609 The 'M','N' parameters are 8,8, and have the same values on all procs. 3610 3611 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3612 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3613 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3614 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3615 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3616 matrix, ans [DF] as another SeqAIJ matrix. 3617 3618 When d_nz, o_nz parameters are specified, d_nz storage elements are 3619 allocated for every row of the local diagonal submatrix, and o_nz 3620 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3621 One way to choose d_nz and o_nz is to use the max nonzerors per local 3622 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3623 In this case, the values of d_nz,o_nz are: 3624 .vb 3625 proc0 : dnz = 2, o_nz = 2 3626 proc1 : dnz = 3, o_nz = 2 3627 proc2 : dnz = 1, o_nz = 4 3628 .ve 3629 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3630 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3631 for proc3. i.e we are using 12+15+10=37 storage locations to store 3632 34 values. 3633 3634 When d_nnz, o_nnz parameters are specified, the storage is specified 3635 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3636 In the above case the values for d_nnz,o_nnz are: 3637 .vb 3638 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3639 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3640 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3641 .ve 3642 Here the space allocated is sum of all the above values i.e 34, and 3643 hence pre-allocation is perfect. 3644 3645 Level: intermediate 3646 3647 .keywords: matrix, aij, compressed row, sparse, parallel 3648 3649 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3650 MPIAIJ, MatCreateMPIAIJWithArrays() 3651 @*/ 3652 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3653 { 3654 PetscErrorCode ierr; 3655 PetscMPIInt size; 3656 3657 PetscFunctionBegin; 3658 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3659 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3660 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3661 if (size > 1) { 3662 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3663 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3664 } else { 3665 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3666 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3667 } 3668 PetscFunctionReturn(0); 3669 } 3670 3671 #undef __FUNCT__ 3672 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3673 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3674 { 3675 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3676 3677 PetscFunctionBegin; 3678 *Ad = a->A; 3679 *Ao = a->B; 3680 *colmap = a->garray; 3681 PetscFunctionReturn(0); 3682 } 3683 3684 #undef __FUNCT__ 3685 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3686 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3687 { 3688 PetscErrorCode ierr; 3689 PetscInt i; 3690 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3691 3692 PetscFunctionBegin; 3693 if (coloring->ctype == IS_COLORING_GLOBAL) { 3694 ISColoringValue *allcolors,*colors; 3695 ISColoring ocoloring; 3696 3697 /* set coloring for diagonal portion */ 3698 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3699 3700 /* set coloring for off-diagonal portion */ 3701 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3702 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3703 for (i=0; i<a->B->cmap->n; i++) { 3704 colors[i] = allcolors[a->garray[i]]; 3705 } 3706 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3707 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3708 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3709 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3710 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3711 ISColoringValue *colors; 3712 PetscInt *larray; 3713 ISColoring ocoloring; 3714 3715 /* set coloring for diagonal portion */ 3716 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3717 for (i=0; i<a->A->cmap->n; i++) { 3718 larray[i] = i + A->cmap->rstart; 3719 } 3720 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3721 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3722 for (i=0; i<a->A->cmap->n; i++) { 3723 colors[i] = coloring->colors[larray[i]]; 3724 } 3725 ierr = PetscFree(larray);CHKERRQ(ierr); 3726 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3727 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3728 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3729 3730 /* set coloring for off-diagonal portion */ 3731 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3732 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3733 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3734 for (i=0; i<a->B->cmap->n; i++) { 3735 colors[i] = coloring->colors[larray[i]]; 3736 } 3737 ierr = PetscFree(larray);CHKERRQ(ierr); 3738 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3739 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3740 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3741 } else { 3742 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3743 } 3744 3745 PetscFunctionReturn(0); 3746 } 3747 3748 #if defined(PETSC_HAVE_ADIC) 3749 #undef __FUNCT__ 3750 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3751 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3752 { 3753 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3754 PetscErrorCode ierr; 3755 3756 PetscFunctionBegin; 3757 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3758 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3759 PetscFunctionReturn(0); 3760 } 3761 #endif 3762 3763 #undef __FUNCT__ 3764 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3765 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3766 { 3767 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3768 PetscErrorCode ierr; 3769 3770 PetscFunctionBegin; 3771 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3772 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3773 PetscFunctionReturn(0); 3774 } 3775 3776 #undef __FUNCT__ 3777 #define __FUNCT__ "MatMerge" 3778 /*@ 3779 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3780 matrices from each processor 3781 3782 Collective on MPI_Comm 3783 3784 Input Parameters: 3785 + comm - the communicators the parallel matrix will live on 3786 . inmat - the input sequential matrices 3787 . n - number of local columns (or PETSC_DECIDE) 3788 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3789 3790 Output Parameter: 3791 . outmat - the parallel matrix generated 3792 3793 Level: advanced 3794 3795 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3796 3797 @*/ 3798 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3799 { 3800 PetscErrorCode ierr; 3801 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3802 PetscInt *indx; 3803 PetscScalar *values; 3804 3805 PetscFunctionBegin; 3806 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3807 if (scall == MAT_INITIAL_MATRIX){ 3808 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3809 if (n == PETSC_DECIDE){ 3810 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3811 } 3812 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3813 rstart -= m; 3814 3815 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3816 for (i=0;i<m;i++) { 3817 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3818 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3819 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3820 } 3821 /* This routine will ONLY return MPIAIJ type matrix */ 3822 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3823 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3824 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3825 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3826 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3827 3828 } else if (scall == MAT_REUSE_MATRIX){ 3829 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3830 } else { 3831 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3832 } 3833 3834 for (i=0;i<m;i++) { 3835 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3836 Ii = i + rstart; 3837 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3838 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3839 } 3840 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3841 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3842 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3843 3844 PetscFunctionReturn(0); 3845 } 3846 3847 #undef __FUNCT__ 3848 #define __FUNCT__ "MatFileSplit" 3849 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3850 { 3851 PetscErrorCode ierr; 3852 PetscMPIInt rank; 3853 PetscInt m,N,i,rstart,nnz; 3854 size_t len; 3855 const PetscInt *indx; 3856 PetscViewer out; 3857 char *name; 3858 Mat B; 3859 const PetscScalar *values; 3860 3861 PetscFunctionBegin; 3862 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3863 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3864 /* Should this be the type of the diagonal block of A? */ 3865 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3866 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3867 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3868 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3869 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3870 for (i=0;i<m;i++) { 3871 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3872 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3873 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3874 } 3875 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3876 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3877 3878 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3879 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3880 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3881 sprintf(name,"%s.%d",outfile,rank); 3882 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3883 ierr = PetscFree(name); 3884 ierr = MatView(B,out);CHKERRQ(ierr); 3885 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3886 ierr = MatDestroy(B);CHKERRQ(ierr); 3887 PetscFunctionReturn(0); 3888 } 3889 3890 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3891 #undef __FUNCT__ 3892 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3893 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3894 { 3895 PetscErrorCode ierr; 3896 Mat_Merge_SeqsToMPI *merge; 3897 PetscContainer container; 3898 3899 PetscFunctionBegin; 3900 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3901 if (container) { 3902 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3903 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3904 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3905 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3906 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3907 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3908 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3909 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3910 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3911 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3912 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3913 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3914 3915 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3916 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3917 } 3918 ierr = PetscFree(merge);CHKERRQ(ierr); 3919 3920 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3921 PetscFunctionReturn(0); 3922 } 3923 3924 #include "../src/mat/utils/freespace.h" 3925 #include "petscbt.h" 3926 3927 #undef __FUNCT__ 3928 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3929 /*@C 3930 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3931 matrices from each processor 3932 3933 Collective on MPI_Comm 3934 3935 Input Parameters: 3936 + comm - the communicators the parallel matrix will live on 3937 . seqmat - the input sequential matrices 3938 . m - number of local rows (or PETSC_DECIDE) 3939 . n - number of local columns (or PETSC_DECIDE) 3940 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3941 3942 Output Parameter: 3943 . mpimat - the parallel matrix generated 3944 3945 Level: advanced 3946 3947 Notes: 3948 The dimensions of the sequential matrix in each processor MUST be the same. 3949 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3950 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3951 @*/ 3952 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3953 { 3954 PetscErrorCode ierr; 3955 MPI_Comm comm=((PetscObject)mpimat)->comm; 3956 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3957 PetscMPIInt size,rank,taga,*len_s; 3958 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 3959 PetscInt proc,m; 3960 PetscInt **buf_ri,**buf_rj; 3961 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3962 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3963 MPI_Request *s_waits,*r_waits; 3964 MPI_Status *status; 3965 MatScalar *aa=a->a; 3966 MatScalar **abuf_r,*ba_i; 3967 Mat_Merge_SeqsToMPI *merge; 3968 PetscContainer container; 3969 3970 PetscFunctionBegin; 3971 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3972 3973 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3974 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3975 3976 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3977 if (container) { 3978 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3979 } 3980 bi = merge->bi; 3981 bj = merge->bj; 3982 buf_ri = merge->buf_ri; 3983 buf_rj = merge->buf_rj; 3984 3985 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3986 owners = merge->rowmap.range; 3987 len_s = merge->len_s; 3988 3989 /* send and recv matrix values */ 3990 /*-----------------------------*/ 3991 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3992 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3993 3994 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3995 for (proc=0,k=0; proc<size; proc++){ 3996 if (!len_s[proc]) continue; 3997 i = owners[proc]; 3998 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3999 k++; 4000 } 4001 4002 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4003 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4004 ierr = PetscFree(status);CHKERRQ(ierr); 4005 4006 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4007 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4008 4009 /* insert mat values of mpimat */ 4010 /*----------------------------*/ 4011 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4012 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4013 nextrow = buf_ri_k + merge->nrecv; 4014 nextai = nextrow + merge->nrecv; 4015 4016 for (k=0; k<merge->nrecv; k++){ 4017 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4018 nrows = *(buf_ri_k[k]); 4019 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4020 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4021 } 4022 4023 /* set values of ba */ 4024 m = merge->rowmap.n; 4025 for (i=0; i<m; i++) { 4026 arow = owners[rank] + i; 4027 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4028 bnzi = bi[i+1] - bi[i]; 4029 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4030 4031 /* add local non-zero vals of this proc's seqmat into ba */ 4032 anzi = ai[arow+1] - ai[arow]; 4033 aj = a->j + ai[arow]; 4034 aa = a->a + ai[arow]; 4035 nextaj = 0; 4036 for (j=0; nextaj<anzi; j++){ 4037 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4038 ba_i[j] += aa[nextaj++]; 4039 } 4040 } 4041 4042 /* add received vals into ba */ 4043 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4044 /* i-th row */ 4045 if (i == *nextrow[k]) { 4046 anzi = *(nextai[k]+1) - *nextai[k]; 4047 aj = buf_rj[k] + *(nextai[k]); 4048 aa = abuf_r[k] + *(nextai[k]); 4049 nextaj = 0; 4050 for (j=0; nextaj<anzi; j++){ 4051 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4052 ba_i[j] += aa[nextaj++]; 4053 } 4054 } 4055 nextrow[k]++; nextai[k]++; 4056 } 4057 } 4058 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4059 } 4060 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4061 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4062 4063 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4064 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4065 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4066 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4067 PetscFunctionReturn(0); 4068 } 4069 4070 #undef __FUNCT__ 4071 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4072 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4073 { 4074 PetscErrorCode ierr; 4075 Mat B_mpi; 4076 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4077 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4078 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4079 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4080 PetscInt len,proc,*dnz,*onz; 4081 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4082 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4083 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4084 MPI_Status *status; 4085 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4086 PetscBT lnkbt; 4087 Mat_Merge_SeqsToMPI *merge; 4088 PetscContainer container; 4089 4090 PetscFunctionBegin; 4091 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4092 4093 /* make sure it is a PETSc comm */ 4094 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4095 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4096 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4097 4098 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4099 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4100 4101 /* determine row ownership */ 4102 /*---------------------------------------------------------*/ 4103 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4104 merge->rowmap.n = m; 4105 merge->rowmap.N = M; 4106 merge->rowmap.bs = 1; 4107 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4108 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4109 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4110 4111 m = merge->rowmap.n; 4112 M = merge->rowmap.N; 4113 owners = merge->rowmap.range; 4114 4115 /* determine the number of messages to send, their lengths */ 4116 /*---------------------------------------------------------*/ 4117 len_s = merge->len_s; 4118 4119 len = 0; /* length of buf_si[] */ 4120 merge->nsend = 0; 4121 for (proc=0; proc<size; proc++){ 4122 len_si[proc] = 0; 4123 if (proc == rank){ 4124 len_s[proc] = 0; 4125 } else { 4126 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4127 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4128 } 4129 if (len_s[proc]) { 4130 merge->nsend++; 4131 nrows = 0; 4132 for (i=owners[proc]; i<owners[proc+1]; i++){ 4133 if (ai[i+1] > ai[i]) nrows++; 4134 } 4135 len_si[proc] = 2*(nrows+1); 4136 len += len_si[proc]; 4137 } 4138 } 4139 4140 /* determine the number and length of messages to receive for ij-structure */ 4141 /*-------------------------------------------------------------------------*/ 4142 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4143 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4144 4145 /* post the Irecv of j-structure */ 4146 /*-------------------------------*/ 4147 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4148 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4149 4150 /* post the Isend of j-structure */ 4151 /*--------------------------------*/ 4152 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4153 sj_waits = si_waits + merge->nsend; 4154 4155 for (proc=0, k=0; proc<size; proc++){ 4156 if (!len_s[proc]) continue; 4157 i = owners[proc]; 4158 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4159 k++; 4160 } 4161 4162 /* receives and sends of j-structure are complete */ 4163 /*------------------------------------------------*/ 4164 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4165 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4166 4167 /* send and recv i-structure */ 4168 /*---------------------------*/ 4169 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4170 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4171 4172 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4173 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4174 for (proc=0,k=0; proc<size; proc++){ 4175 if (!len_s[proc]) continue; 4176 /* form outgoing message for i-structure: 4177 buf_si[0]: nrows to be sent 4178 [1:nrows]: row index (global) 4179 [nrows+1:2*nrows+1]: i-structure index 4180 */ 4181 /*-------------------------------------------*/ 4182 nrows = len_si[proc]/2 - 1; 4183 buf_si_i = buf_si + nrows+1; 4184 buf_si[0] = nrows; 4185 buf_si_i[0] = 0; 4186 nrows = 0; 4187 for (i=owners[proc]; i<owners[proc+1]; i++){ 4188 anzi = ai[i+1] - ai[i]; 4189 if (anzi) { 4190 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4191 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4192 nrows++; 4193 } 4194 } 4195 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4196 k++; 4197 buf_si += len_si[proc]; 4198 } 4199 4200 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4201 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4202 4203 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4204 for (i=0; i<merge->nrecv; i++){ 4205 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4206 } 4207 4208 ierr = PetscFree(len_si);CHKERRQ(ierr); 4209 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4210 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4211 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4212 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4213 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4214 ierr = PetscFree(status);CHKERRQ(ierr); 4215 4216 /* compute a local seq matrix in each processor */ 4217 /*----------------------------------------------*/ 4218 /* allocate bi array and free space for accumulating nonzero column info */ 4219 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4220 bi[0] = 0; 4221 4222 /* create and initialize a linked list */ 4223 nlnk = N+1; 4224 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4225 4226 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4227 len = 0; 4228 len = ai[owners[rank+1]] - ai[owners[rank]]; 4229 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4230 current_space = free_space; 4231 4232 /* determine symbolic info for each local row */ 4233 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4234 nextrow = buf_ri_k + merge->nrecv; 4235 nextai = nextrow + merge->nrecv; 4236 for (k=0; k<merge->nrecv; k++){ 4237 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4238 nrows = *buf_ri_k[k]; 4239 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4240 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4241 } 4242 4243 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4244 len = 0; 4245 for (i=0;i<m;i++) { 4246 bnzi = 0; 4247 /* add local non-zero cols of this proc's seqmat into lnk */ 4248 arow = owners[rank] + i; 4249 anzi = ai[arow+1] - ai[arow]; 4250 aj = a->j + ai[arow]; 4251 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4252 bnzi += nlnk; 4253 /* add received col data into lnk */ 4254 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4255 if (i == *nextrow[k]) { /* i-th row */ 4256 anzi = *(nextai[k]+1) - *nextai[k]; 4257 aj = buf_rj[k] + *nextai[k]; 4258 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4259 bnzi += nlnk; 4260 nextrow[k]++; nextai[k]++; 4261 } 4262 } 4263 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4264 4265 /* if free space is not available, make more free space */ 4266 if (current_space->local_remaining<bnzi) { 4267 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4268 nspacedouble++; 4269 } 4270 /* copy data into free space, then initialize lnk */ 4271 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4272 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4273 4274 current_space->array += bnzi; 4275 current_space->local_used += bnzi; 4276 current_space->local_remaining -= bnzi; 4277 4278 bi[i+1] = bi[i] + bnzi; 4279 } 4280 4281 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4282 4283 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4284 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4285 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4286 4287 /* create symbolic parallel matrix B_mpi */ 4288 /*---------------------------------------*/ 4289 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4290 if (n==PETSC_DECIDE) { 4291 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4292 } else { 4293 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4294 } 4295 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4296 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4297 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4298 4299 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4300 B_mpi->assembled = PETSC_FALSE; 4301 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4302 merge->bi = bi; 4303 merge->bj = bj; 4304 merge->buf_ri = buf_ri; 4305 merge->buf_rj = buf_rj; 4306 merge->coi = PETSC_NULL; 4307 merge->coj = PETSC_NULL; 4308 merge->owners_co = PETSC_NULL; 4309 4310 /* attach the supporting struct to B_mpi for reuse */ 4311 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4312 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4313 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4314 *mpimat = B_mpi; 4315 4316 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4317 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4318 PetscFunctionReturn(0); 4319 } 4320 4321 #undef __FUNCT__ 4322 #define __FUNCT__ "MatMerge_SeqsToMPI" 4323 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4324 { 4325 PetscErrorCode ierr; 4326 4327 PetscFunctionBegin; 4328 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4329 if (scall == MAT_INITIAL_MATRIX){ 4330 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4331 } 4332 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4333 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4334 PetscFunctionReturn(0); 4335 } 4336 4337 #undef __FUNCT__ 4338 #define __FUNCT__ "MatGetLocalMat" 4339 /*@ 4340 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4341 4342 Not Collective 4343 4344 Input Parameters: 4345 + A - the matrix 4346 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4347 4348 Output Parameter: 4349 . A_loc - the local sequential matrix generated 4350 4351 Level: developer 4352 4353 @*/ 4354 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4355 { 4356 PetscErrorCode ierr; 4357 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4358 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4359 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4360 MatScalar *aa=a->a,*ba=b->a,*cam; 4361 PetscScalar *ca; 4362 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4363 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4364 4365 PetscFunctionBegin; 4366 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4367 if (scall == MAT_INITIAL_MATRIX){ 4368 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4369 ci[0] = 0; 4370 for (i=0; i<am; i++){ 4371 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4372 } 4373 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4374 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4375 k = 0; 4376 for (i=0; i<am; i++) { 4377 ncols_o = bi[i+1] - bi[i]; 4378 ncols_d = ai[i+1] - ai[i]; 4379 /* off-diagonal portion of A */ 4380 for (jo=0; jo<ncols_o; jo++) { 4381 col = cmap[*bj]; 4382 if (col >= cstart) break; 4383 cj[k] = col; bj++; 4384 ca[k++] = *ba++; 4385 } 4386 /* diagonal portion of A */ 4387 for (j=0; j<ncols_d; j++) { 4388 cj[k] = cstart + *aj++; 4389 ca[k++] = *aa++; 4390 } 4391 /* off-diagonal portion of A */ 4392 for (j=jo; j<ncols_o; j++) { 4393 cj[k] = cmap[*bj++]; 4394 ca[k++] = *ba++; 4395 } 4396 } 4397 /* put together the new matrix */ 4398 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4399 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4400 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4401 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4402 mat->free_a = PETSC_TRUE; 4403 mat->free_ij = PETSC_TRUE; 4404 mat->nonew = 0; 4405 } else if (scall == MAT_REUSE_MATRIX){ 4406 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4407 ci = mat->i; cj = mat->j; cam = mat->a; 4408 for (i=0; i<am; i++) { 4409 /* off-diagonal portion of A */ 4410 ncols_o = bi[i+1] - bi[i]; 4411 for (jo=0; jo<ncols_o; jo++) { 4412 col = cmap[*bj]; 4413 if (col >= cstart) break; 4414 *cam++ = *ba++; bj++; 4415 } 4416 /* diagonal portion of A */ 4417 ncols_d = ai[i+1] - ai[i]; 4418 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4419 /* off-diagonal portion of A */ 4420 for (j=jo; j<ncols_o; j++) { 4421 *cam++ = *ba++; bj++; 4422 } 4423 } 4424 } else { 4425 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4426 } 4427 4428 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4429 PetscFunctionReturn(0); 4430 } 4431 4432 #undef __FUNCT__ 4433 #define __FUNCT__ "MatGetLocalMatCondensed" 4434 /*@C 4435 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4436 4437 Not Collective 4438 4439 Input Parameters: 4440 + A - the matrix 4441 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4442 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4443 4444 Output Parameter: 4445 . A_loc - the local sequential matrix generated 4446 4447 Level: developer 4448 4449 @*/ 4450 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4451 { 4452 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4453 PetscErrorCode ierr; 4454 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4455 IS isrowa,iscola; 4456 Mat *aloc; 4457 4458 PetscFunctionBegin; 4459 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4460 if (!row){ 4461 start = A->rmap->rstart; end = A->rmap->rend; 4462 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4463 } else { 4464 isrowa = *row; 4465 } 4466 if (!col){ 4467 start = A->cmap->rstart; 4468 cmap = a->garray; 4469 nzA = a->A->cmap->n; 4470 nzB = a->B->cmap->n; 4471 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4472 ncols = 0; 4473 for (i=0; i<nzB; i++) { 4474 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4475 else break; 4476 } 4477 imark = i; 4478 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4479 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4480 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4481 ierr = PetscFree(idx);CHKERRQ(ierr); 4482 } else { 4483 iscola = *col; 4484 } 4485 if (scall != MAT_INITIAL_MATRIX){ 4486 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4487 aloc[0] = *A_loc; 4488 } 4489 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4490 *A_loc = aloc[0]; 4491 ierr = PetscFree(aloc);CHKERRQ(ierr); 4492 if (!row){ 4493 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4494 } 4495 if (!col){ 4496 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4497 } 4498 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4499 PetscFunctionReturn(0); 4500 } 4501 4502 #undef __FUNCT__ 4503 #define __FUNCT__ "MatGetBrowsOfAcols" 4504 /*@C 4505 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4506 4507 Collective on Mat 4508 4509 Input Parameters: 4510 + A,B - the matrices in mpiaij format 4511 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4512 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4513 4514 Output Parameter: 4515 + rowb, colb - index sets of rows and columns of B to extract 4516 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4517 - B_seq - the sequential matrix generated 4518 4519 Level: developer 4520 4521 @*/ 4522 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4523 { 4524 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4525 PetscErrorCode ierr; 4526 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4527 IS isrowb,iscolb; 4528 Mat *bseq; 4529 4530 PetscFunctionBegin; 4531 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4532 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4533 } 4534 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4535 4536 if (scall == MAT_INITIAL_MATRIX){ 4537 start = A->cmap->rstart; 4538 cmap = a->garray; 4539 nzA = a->A->cmap->n; 4540 nzB = a->B->cmap->n; 4541 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4542 ncols = 0; 4543 for (i=0; i<nzB; i++) { /* row < local row index */ 4544 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4545 else break; 4546 } 4547 imark = i; 4548 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4549 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4550 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4551 ierr = PetscFree(idx);CHKERRQ(ierr); 4552 *brstart = imark; 4553 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4554 } else { 4555 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4556 isrowb = *rowb; iscolb = *colb; 4557 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4558 bseq[0] = *B_seq; 4559 } 4560 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4561 *B_seq = bseq[0]; 4562 ierr = PetscFree(bseq);CHKERRQ(ierr); 4563 if (!rowb){ 4564 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4565 } else { 4566 *rowb = isrowb; 4567 } 4568 if (!colb){ 4569 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4570 } else { 4571 *colb = iscolb; 4572 } 4573 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4574 PetscFunctionReturn(0); 4575 } 4576 4577 #undef __FUNCT__ 4578 #define __FUNCT__ "MatGetBrowsOfAoCols" 4579 /*@C 4580 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4581 of the OFF-DIAGONAL portion of local A 4582 4583 Collective on Mat 4584 4585 Input Parameters: 4586 + A,B - the matrices in mpiaij format 4587 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4588 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4589 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4590 4591 Output Parameter: 4592 + B_oth - the sequential matrix generated 4593 4594 Level: developer 4595 4596 @*/ 4597 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4598 { 4599 VecScatter_MPI_General *gen_to,*gen_from; 4600 PetscErrorCode ierr; 4601 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4602 Mat_SeqAIJ *b_oth; 4603 VecScatter ctx=a->Mvctx; 4604 MPI_Comm comm=((PetscObject)ctx)->comm; 4605 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4606 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4607 PetscScalar *rvalues,*svalues; 4608 MatScalar *b_otha,*bufa,*bufA; 4609 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4610 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4611 MPI_Status *sstatus,rstatus; 4612 PetscMPIInt jj; 4613 PetscInt *cols,sbs,rbs; 4614 PetscScalar *vals; 4615 4616 PetscFunctionBegin; 4617 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4618 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4619 } 4620 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4621 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4622 4623 gen_to = (VecScatter_MPI_General*)ctx->todata; 4624 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4625 rvalues = gen_from->values; /* holds the length of receiving row */ 4626 svalues = gen_to->values; /* holds the length of sending row */ 4627 nrecvs = gen_from->n; 4628 nsends = gen_to->n; 4629 4630 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4631 srow = gen_to->indices; /* local row index to be sent */ 4632 sstarts = gen_to->starts; 4633 sprocs = gen_to->procs; 4634 sstatus = gen_to->sstatus; 4635 sbs = gen_to->bs; 4636 rstarts = gen_from->starts; 4637 rprocs = gen_from->procs; 4638 rbs = gen_from->bs; 4639 4640 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4641 if (scall == MAT_INITIAL_MATRIX){ 4642 /* i-array */ 4643 /*---------*/ 4644 /* post receives */ 4645 for (i=0; i<nrecvs; i++){ 4646 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4647 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4648 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4649 } 4650 4651 /* pack the outgoing message */ 4652 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4653 rstartsj = sstartsj + nsends +1; 4654 sstartsj[0] = 0; rstartsj[0] = 0; 4655 len = 0; /* total length of j or a array to be sent */ 4656 k = 0; 4657 for (i=0; i<nsends; i++){ 4658 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4659 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4660 for (j=0; j<nrows; j++) { 4661 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4662 for (l=0; l<sbs; l++){ 4663 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4664 rowlen[j*sbs+l] = ncols; 4665 len += ncols; 4666 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4667 } 4668 k++; 4669 } 4670 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4671 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4672 } 4673 /* recvs and sends of i-array are completed */ 4674 i = nrecvs; 4675 while (i--) { 4676 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4677 } 4678 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4679 4680 /* allocate buffers for sending j and a arrays */ 4681 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4682 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4683 4684 /* create i-array of B_oth */ 4685 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4686 b_othi[0] = 0; 4687 len = 0; /* total length of j or a array to be received */ 4688 k = 0; 4689 for (i=0; i<nrecvs; i++){ 4690 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4691 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4692 for (j=0; j<nrows; j++) { 4693 b_othi[k+1] = b_othi[k] + rowlen[j]; 4694 len += rowlen[j]; k++; 4695 } 4696 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4697 } 4698 4699 /* allocate space for j and a arrrays of B_oth */ 4700 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4701 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4702 4703 /* j-array */ 4704 /*---------*/ 4705 /* post receives of j-array */ 4706 for (i=0; i<nrecvs; i++){ 4707 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4708 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4709 } 4710 4711 /* pack the outgoing message j-array */ 4712 k = 0; 4713 for (i=0; i<nsends; i++){ 4714 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4715 bufJ = bufj+sstartsj[i]; 4716 for (j=0; j<nrows; j++) { 4717 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4718 for (ll=0; ll<sbs; ll++){ 4719 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4720 for (l=0; l<ncols; l++){ 4721 *bufJ++ = cols[l]; 4722 } 4723 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4724 } 4725 } 4726 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4727 } 4728 4729 /* recvs and sends of j-array are completed */ 4730 i = nrecvs; 4731 while (i--) { 4732 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4733 } 4734 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4735 } else if (scall == MAT_REUSE_MATRIX){ 4736 sstartsj = *startsj; 4737 rstartsj = sstartsj + nsends +1; 4738 bufa = *bufa_ptr; 4739 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4740 b_otha = b_oth->a; 4741 } else { 4742 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4743 } 4744 4745 /* a-array */ 4746 /*---------*/ 4747 /* post receives of a-array */ 4748 for (i=0; i<nrecvs; i++){ 4749 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4750 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4751 } 4752 4753 /* pack the outgoing message a-array */ 4754 k = 0; 4755 for (i=0; i<nsends; i++){ 4756 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4757 bufA = bufa+sstartsj[i]; 4758 for (j=0; j<nrows; j++) { 4759 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4760 for (ll=0; ll<sbs; ll++){ 4761 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4762 for (l=0; l<ncols; l++){ 4763 *bufA++ = vals[l]; 4764 } 4765 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4766 } 4767 } 4768 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4769 } 4770 /* recvs and sends of a-array are completed */ 4771 i = nrecvs; 4772 while (i--) { 4773 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4774 } 4775 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4776 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4777 4778 if (scall == MAT_INITIAL_MATRIX){ 4779 /* put together the new matrix */ 4780 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4781 4782 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4783 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4784 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4785 b_oth->free_a = PETSC_TRUE; 4786 b_oth->free_ij = PETSC_TRUE; 4787 b_oth->nonew = 0; 4788 4789 ierr = PetscFree(bufj);CHKERRQ(ierr); 4790 if (!startsj || !bufa_ptr){ 4791 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4792 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4793 } else { 4794 *startsj = sstartsj; 4795 *bufa_ptr = bufa; 4796 } 4797 } 4798 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4799 PetscFunctionReturn(0); 4800 } 4801 4802 #undef __FUNCT__ 4803 #define __FUNCT__ "MatGetCommunicationStructs" 4804 /*@C 4805 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4806 4807 Not Collective 4808 4809 Input Parameters: 4810 . A - The matrix in mpiaij format 4811 4812 Output Parameter: 4813 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4814 . colmap - A map from global column index to local index into lvec 4815 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4816 4817 Level: developer 4818 4819 @*/ 4820 #if defined (PETSC_USE_CTABLE) 4821 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4822 #else 4823 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4824 #endif 4825 { 4826 Mat_MPIAIJ *a; 4827 4828 PetscFunctionBegin; 4829 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4830 PetscValidPointer(lvec, 2) 4831 PetscValidPointer(colmap, 3) 4832 PetscValidPointer(multScatter, 4) 4833 a = (Mat_MPIAIJ *) A->data; 4834 if (lvec) *lvec = a->lvec; 4835 if (colmap) *colmap = a->colmap; 4836 if (multScatter) *multScatter = a->Mvctx; 4837 PetscFunctionReturn(0); 4838 } 4839 4840 EXTERN_C_BEGIN 4841 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4842 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4843 EXTERN_C_END 4844 4845 #include "../src/mat/impls/dense/mpi/mpidense.h" 4846 4847 #undef __FUNCT__ 4848 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4849 /* 4850 Computes (B'*A')' since computing B*A directly is untenable 4851 4852 n p p 4853 ( ) ( ) ( ) 4854 m ( A ) * n ( B ) = m ( C ) 4855 ( ) ( ) ( ) 4856 4857 */ 4858 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4859 { 4860 PetscErrorCode ierr; 4861 Mat At,Bt,Ct; 4862 4863 PetscFunctionBegin; 4864 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4865 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4866 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4867 ierr = MatDestroy(At);CHKERRQ(ierr); 4868 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4869 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4870 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4871 PetscFunctionReturn(0); 4872 } 4873 4874 #undef __FUNCT__ 4875 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4876 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4877 { 4878 PetscErrorCode ierr; 4879 PetscInt m=A->rmap->n,n=B->cmap->n; 4880 Mat Cmat; 4881 4882 PetscFunctionBegin; 4883 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4884 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4885 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4886 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4887 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4888 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4889 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4890 *C = Cmat; 4891 PetscFunctionReturn(0); 4892 } 4893 4894 /* ----------------------------------------------------------------*/ 4895 #undef __FUNCT__ 4896 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4897 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4898 { 4899 PetscErrorCode ierr; 4900 4901 PetscFunctionBegin; 4902 if (scall == MAT_INITIAL_MATRIX){ 4903 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4904 } 4905 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4906 PetscFunctionReturn(0); 4907 } 4908 4909 EXTERN_C_BEGIN 4910 #if defined(PETSC_HAVE_MUMPS) 4911 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4912 #endif 4913 #if defined(PETSC_HAVE_PASTIX) 4914 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4915 #endif 4916 #if defined(PETSC_HAVE_SUPERLU_DIST) 4917 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4918 #endif 4919 #if defined(PETSC_HAVE_SPOOLES) 4920 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4921 #endif 4922 EXTERN_C_END 4923 4924 /*MC 4925 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4926 4927 Options Database Keys: 4928 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4929 4930 Level: beginner 4931 4932 .seealso: MatCreateMPIAIJ() 4933 M*/ 4934 4935 EXTERN_C_BEGIN 4936 #undef __FUNCT__ 4937 #define __FUNCT__ "MatCreate_MPIAIJ" 4938 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4939 { 4940 Mat_MPIAIJ *b; 4941 PetscErrorCode ierr; 4942 PetscMPIInt size; 4943 4944 PetscFunctionBegin; 4945 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4946 4947 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4948 B->data = (void*)b; 4949 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4950 B->rmap->bs = 1; 4951 B->assembled = PETSC_FALSE; 4952 B->mapping = 0; 4953 4954 B->insertmode = NOT_SET_VALUES; 4955 b->size = size; 4956 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4957 4958 /* build cache for off array entries formed */ 4959 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4960 b->donotstash = PETSC_FALSE; 4961 b->colmap = 0; 4962 b->garray = 0; 4963 b->roworiented = PETSC_TRUE; 4964 4965 /* stuff used for matrix vector multiply */ 4966 b->lvec = PETSC_NULL; 4967 b->Mvctx = PETSC_NULL; 4968 4969 /* stuff for MatGetRow() */ 4970 b->rowindices = 0; 4971 b->rowvalues = 0; 4972 b->getrowactive = PETSC_FALSE; 4973 4974 #if defined(PETSC_HAVE_SPOOLES) 4975 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 4976 "MatGetFactor_mpiaij_spooles", 4977 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 4978 #endif 4979 #if defined(PETSC_HAVE_MUMPS) 4980 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 4981 "MatGetFactor_mpiaij_mumps", 4982 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 4983 #endif 4984 #if defined(PETSC_HAVE_PASTIX) 4985 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 4986 "MatGetFactor_mpiaij_pastix", 4987 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 4988 #endif 4989 #if defined(PETSC_HAVE_SUPERLU_DIST) 4990 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 4991 "MatGetFactor_mpiaij_superlu_dist", 4992 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 4993 #endif 4994 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4995 "MatStoreValues_MPIAIJ", 4996 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4997 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4998 "MatRetrieveValues_MPIAIJ", 4999 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5000 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5001 "MatGetDiagonalBlock_MPIAIJ", 5002 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5003 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5004 "MatIsTranspose_MPIAIJ", 5005 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5006 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5007 "MatMPIAIJSetPreallocation_MPIAIJ", 5008 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5009 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5010 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5011 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5012 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5013 "MatDiagonalScaleLocal_MPIAIJ", 5014 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5015 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5016 "MatConvert_MPIAIJ_MPICSRPERM", 5017 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5018 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5019 "MatConvert_MPIAIJ_MPICRL", 5020 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5021 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5022 "MatMatMult_MPIDense_MPIAIJ", 5023 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5024 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5025 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5026 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5027 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5028 "MatMatMultNumeric_MPIDense_MPIAIJ", 5029 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5030 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5031 PetscFunctionReturn(0); 5032 } 5033 EXTERN_C_END 5034 5035 #undef __FUNCT__ 5036 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5037 /*@ 5038 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5039 and "off-diagonal" part of the matrix in CSR format. 5040 5041 Collective on MPI_Comm 5042 5043 Input Parameters: 5044 + comm - MPI communicator 5045 . m - number of local rows (Cannot be PETSC_DECIDE) 5046 . n - This value should be the same as the local size used in creating the 5047 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5048 calculated if N is given) For square matrices n is almost always m. 5049 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5050 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5051 . i - row indices for "diagonal" portion of matrix 5052 . j - column indices 5053 . a - matrix values 5054 . oi - row indices for "off-diagonal" portion of matrix 5055 . oj - column indices 5056 - oa - matrix values 5057 5058 Output Parameter: 5059 . mat - the matrix 5060 5061 Level: advanced 5062 5063 Notes: 5064 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5065 5066 The i and j indices are 0 based 5067 5068 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5069 5070 5071 .keywords: matrix, aij, compressed row, sparse, parallel 5072 5073 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5074 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5075 @*/ 5076 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5077 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5078 { 5079 PetscErrorCode ierr; 5080 Mat_MPIAIJ *maij; 5081 5082 PetscFunctionBegin; 5083 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5084 if (i[0]) { 5085 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5086 } 5087 if (oi[0]) { 5088 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5089 } 5090 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5091 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5092 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5093 maij = (Mat_MPIAIJ*) (*mat)->data; 5094 maij->donotstash = PETSC_TRUE; 5095 (*mat)->preallocated = PETSC_TRUE; 5096 5097 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5098 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5099 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5100 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5101 5102 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5103 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5104 5105 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5106 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5107 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5108 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5109 5110 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5111 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5112 PetscFunctionReturn(0); 5113 } 5114 5115 /* 5116 Special version for direct calls from Fortran 5117 */ 5118 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5119 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5120 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5121 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5122 #endif 5123 5124 /* Change these macros so can be used in void function */ 5125 #undef CHKERRQ 5126 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5127 #undef SETERRQ2 5128 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5129 #undef SETERRQ 5130 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5131 5132 EXTERN_C_BEGIN 5133 #undef __FUNCT__ 5134 #define __FUNCT__ "matsetvaluesmpiaij_" 5135 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5136 { 5137 Mat mat = *mmat; 5138 PetscInt m = *mm, n = *mn; 5139 InsertMode addv = *maddv; 5140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5141 PetscScalar value; 5142 PetscErrorCode ierr; 5143 5144 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5145 if (mat->insertmode == NOT_SET_VALUES) { 5146 mat->insertmode = addv; 5147 } 5148 #if defined(PETSC_USE_DEBUG) 5149 else if (mat->insertmode != addv) { 5150 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5151 } 5152 #endif 5153 { 5154 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5155 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5156 PetscTruth roworiented = aij->roworiented; 5157 5158 /* Some Variables required in the macro */ 5159 Mat A = aij->A; 5160 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5161 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5162 MatScalar *aa = a->a; 5163 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5164 Mat B = aij->B; 5165 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5166 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5167 MatScalar *ba = b->a; 5168 5169 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5170 PetscInt nonew = a->nonew; 5171 MatScalar *ap1,*ap2; 5172 5173 PetscFunctionBegin; 5174 for (i=0; i<m; i++) { 5175 if (im[i] < 0) continue; 5176 #if defined(PETSC_USE_DEBUG) 5177 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5178 #endif 5179 if (im[i] >= rstart && im[i] < rend) { 5180 row = im[i] - rstart; 5181 lastcol1 = -1; 5182 rp1 = aj + ai[row]; 5183 ap1 = aa + ai[row]; 5184 rmax1 = aimax[row]; 5185 nrow1 = ailen[row]; 5186 low1 = 0; 5187 high1 = nrow1; 5188 lastcol2 = -1; 5189 rp2 = bj + bi[row]; 5190 ap2 = ba + bi[row]; 5191 rmax2 = bimax[row]; 5192 nrow2 = bilen[row]; 5193 low2 = 0; 5194 high2 = nrow2; 5195 5196 for (j=0; j<n; j++) { 5197 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5198 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5199 if (in[j] >= cstart && in[j] < cend){ 5200 col = in[j] - cstart; 5201 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5202 } else if (in[j] < 0) continue; 5203 #if defined(PETSC_USE_DEBUG) 5204 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5205 #endif 5206 else { 5207 if (mat->was_assembled) { 5208 if (!aij->colmap) { 5209 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5210 } 5211 #if defined (PETSC_USE_CTABLE) 5212 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5213 col--; 5214 #else 5215 col = aij->colmap[in[j]] - 1; 5216 #endif 5217 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5218 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5219 col = in[j]; 5220 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5221 B = aij->B; 5222 b = (Mat_SeqAIJ*)B->data; 5223 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5224 rp2 = bj + bi[row]; 5225 ap2 = ba + bi[row]; 5226 rmax2 = bimax[row]; 5227 nrow2 = bilen[row]; 5228 low2 = 0; 5229 high2 = nrow2; 5230 bm = aij->B->rmap->n; 5231 ba = b->a; 5232 } 5233 } else col = in[j]; 5234 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5235 } 5236 } 5237 } else { 5238 if (!aij->donotstash) { 5239 if (roworiented) { 5240 if (ignorezeroentries && v[i*n] == 0.0) continue; 5241 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5242 } else { 5243 if (ignorezeroentries && v[i] == 0.0) continue; 5244 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5245 } 5246 } 5247 } 5248 }} 5249 PetscFunctionReturnVoid(); 5250 } 5251 EXTERN_C_END 5252 5253