1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "../src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap->N; 932 header[2] = mat->cmap->N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap->n; 938 range = mat->rmap->range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap->n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap->n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap->n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap->n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap->N > 1024) { 1114 PetscTruth flg; 1115 1116 ierr = PetscOptionsHasName(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg);CHKERRQ(ierr); 1117 if (!flg) { 1118 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1119 } 1120 } 1121 1122 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1123 if (!rank) { 1124 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1125 } else { 1126 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1127 } 1128 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1129 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1130 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1132 1133 /* copy over the A part */ 1134 Aloc = (Mat_SeqAIJ*)aij->A->data; 1135 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1136 row = mat->rmap->rstart; 1137 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1138 for (i=0; i<m; i++) { 1139 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1140 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1141 } 1142 aj = Aloc->j; 1143 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1144 1145 /* copy over the B part */ 1146 Aloc = (Mat_SeqAIJ*)aij->B->data; 1147 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148 row = mat->rmap->rstart; 1149 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1150 ct = cols; 1151 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1155 } 1156 ierr = PetscFree(ct);CHKERRQ(ierr); 1157 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1158 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1159 /* 1160 Everyone has to call to draw the matrix since the graphics waits are 1161 synchronized across all processors that share the PetscDraw object 1162 */ 1163 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1164 if (!rank) { 1165 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1166 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1167 } 1168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169 ierr = MatDestroy(A);CHKERRQ(ierr); 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatView_MPIAIJ" 1176 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1177 { 1178 PetscErrorCode ierr; 1179 PetscTruth iascii,isdraw,issocket,isbinary; 1180 1181 PetscFunctionBegin; 1182 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1183 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1184 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1185 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1186 if (iascii || isdraw || isbinary || issocket) { 1187 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1188 } else { 1189 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatRelax_MPIAIJ" 1196 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1197 { 1198 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1199 PetscErrorCode ierr; 1200 Vec bb1; 1201 1202 PetscFunctionBegin; 1203 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1204 1205 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1206 if (flag & SOR_ZERO_INITIAL_GUESS) { 1207 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1208 its--; 1209 } 1210 1211 while (its--) { 1212 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 1215 /* update rhs: bb1 = bb - B*x */ 1216 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1217 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1218 1219 /* local sweep */ 1220 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1221 } 1222 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1223 if (flag & SOR_ZERO_INITIAL_GUESS) { 1224 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1225 its--; 1226 } 1227 while (its--) { 1228 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 1231 /* update rhs: bb1 = bb - B*x */ 1232 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1233 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1234 1235 /* local sweep */ 1236 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1237 } 1238 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1239 if (flag & SOR_ZERO_INITIAL_GUESS) { 1240 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1241 its--; 1242 } 1243 while (its--) { 1244 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 1247 /* update rhs: bb1 = bb - B*x */ 1248 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1249 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1250 1251 /* local sweep */ 1252 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1253 } 1254 } else { 1255 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1256 } 1257 1258 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatPermute_MPIAIJ" 1264 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1265 { 1266 MPI_Comm comm,pcomm; 1267 PetscInt first,local_size,nrows; 1268 const PetscInt *rows; 1269 PetscMPIInt size; 1270 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1275 /* make a collective version of 'rowp' */ 1276 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1277 if (pcomm==comm) { 1278 crowp = rowp; 1279 } else { 1280 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1281 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1282 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1283 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1284 } 1285 /* collect the global row permutation and invert it */ 1286 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1287 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1288 if (pcomm!=comm) { 1289 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1290 } 1291 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1292 /* get the local target indices */ 1293 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1294 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1295 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1296 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1297 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1298 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1299 /* the column permutation is so much easier; 1300 make a local version of 'colp' and invert it */ 1301 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1302 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1303 if (size==1) { 1304 lcolp = colp; 1305 } else { 1306 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1307 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1308 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1309 } 1310 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1311 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1312 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1313 if (size>1) { 1314 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1315 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1316 } 1317 /* now we just get the submatrix */ 1318 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1319 /* clean up */ 1320 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1321 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1327 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1328 { 1329 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1330 Mat A = mat->A,B = mat->B; 1331 PetscErrorCode ierr; 1332 PetscReal isend[5],irecv[5]; 1333 1334 PetscFunctionBegin; 1335 info->block_size = 1.0; 1336 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1337 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1338 isend[3] = info->memory; isend[4] = info->mallocs; 1339 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1340 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1341 isend[3] += info->memory; isend[4] += info->mallocs; 1342 if (flag == MAT_LOCAL) { 1343 info->nz_used = isend[0]; 1344 info->nz_allocated = isend[1]; 1345 info->nz_unneeded = isend[2]; 1346 info->memory = isend[3]; 1347 info->mallocs = isend[4]; 1348 } else if (flag == MAT_GLOBAL_MAX) { 1349 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1350 info->nz_used = irecv[0]; 1351 info->nz_allocated = irecv[1]; 1352 info->nz_unneeded = irecv[2]; 1353 info->memory = irecv[3]; 1354 info->mallocs = irecv[4]; 1355 } else if (flag == MAT_GLOBAL_SUM) { 1356 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1357 info->nz_used = irecv[0]; 1358 info->nz_allocated = irecv[1]; 1359 info->nz_unneeded = irecv[2]; 1360 info->memory = irecv[3]; 1361 info->mallocs = irecv[4]; 1362 } 1363 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1364 info->fill_ratio_needed = 0; 1365 info->factor_mallocs = 0; 1366 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatSetOption_MPIAIJ" 1372 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1373 { 1374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 switch (op) { 1379 case MAT_NEW_NONZERO_LOCATIONS: 1380 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1381 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1382 case MAT_KEEP_ZEROED_ROWS: 1383 case MAT_NEW_NONZERO_LOCATION_ERR: 1384 case MAT_USE_INODES: 1385 case MAT_IGNORE_ZERO_ENTRIES: 1386 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1387 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1388 break; 1389 case MAT_ROW_ORIENTED: 1390 a->roworiented = flg; 1391 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1392 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1393 break; 1394 case MAT_NEW_DIAGONALS: 1395 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1396 break; 1397 case MAT_IGNORE_OFF_PROC_ENTRIES: 1398 a->donotstash = PETSC_TRUE; 1399 break; 1400 case MAT_SYMMETRIC: 1401 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1402 break; 1403 case MAT_STRUCTURALLY_SYMMETRIC: 1404 case MAT_HERMITIAN: 1405 case MAT_SYMMETRY_ETERNAL: 1406 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1407 break; 1408 default: 1409 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatGetRow_MPIAIJ" 1416 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1417 { 1418 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1419 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1420 PetscErrorCode ierr; 1421 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1422 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1423 PetscInt *cmap,*idx_p; 1424 1425 PetscFunctionBegin; 1426 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1427 mat->getrowactive = PETSC_TRUE; 1428 1429 if (!mat->rowvalues && (idx || v)) { 1430 /* 1431 allocate enough space to hold information from the longest row. 1432 */ 1433 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1434 PetscInt max = 1,tmp; 1435 for (i=0; i<matin->rmap->n; i++) { 1436 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1437 if (max < tmp) { max = tmp; } 1438 } 1439 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1440 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1441 } 1442 1443 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1444 lrow = row - rstart; 1445 1446 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1447 if (!v) {pvA = 0; pvB = 0;} 1448 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1449 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1450 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1451 nztot = nzA + nzB; 1452 1453 cmap = mat->garray; 1454 if (v || idx) { 1455 if (nztot) { 1456 /* Sort by increasing column numbers, assuming A and B already sorted */ 1457 PetscInt imark = -1; 1458 if (v) { 1459 *v = v_p = mat->rowvalues; 1460 for (i=0; i<nzB; i++) { 1461 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1462 else break; 1463 } 1464 imark = i; 1465 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1466 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1467 } 1468 if (idx) { 1469 *idx = idx_p = mat->rowindices; 1470 if (imark > -1) { 1471 for (i=0; i<imark; i++) { 1472 idx_p[i] = cmap[cworkB[i]]; 1473 } 1474 } else { 1475 for (i=0; i<nzB; i++) { 1476 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1477 else break; 1478 } 1479 imark = i; 1480 } 1481 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1482 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1483 } 1484 } else { 1485 if (idx) *idx = 0; 1486 if (v) *v = 0; 1487 } 1488 } 1489 *nz = nztot; 1490 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1491 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1497 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1498 { 1499 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1500 1501 PetscFunctionBegin; 1502 if (!aij->getrowactive) { 1503 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1504 } 1505 aij->getrowactive = PETSC_FALSE; 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatNorm_MPIAIJ" 1511 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1512 { 1513 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1514 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1515 PetscErrorCode ierr; 1516 PetscInt i,j,cstart = mat->cmap->rstart; 1517 PetscReal sum = 0.0; 1518 MatScalar *v; 1519 1520 PetscFunctionBegin; 1521 if (aij->size == 1) { 1522 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1523 } else { 1524 if (type == NORM_FROBENIUS) { 1525 v = amat->a; 1526 for (i=0; i<amat->nz; i++) { 1527 #if defined(PETSC_USE_COMPLEX) 1528 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1529 #else 1530 sum += (*v)*(*v); v++; 1531 #endif 1532 } 1533 v = bmat->a; 1534 for (i=0; i<bmat->nz; i++) { 1535 #if defined(PETSC_USE_COMPLEX) 1536 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1537 #else 1538 sum += (*v)*(*v); v++; 1539 #endif 1540 } 1541 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1542 *norm = sqrt(*norm); 1543 } else if (type == NORM_1) { /* max column norm */ 1544 PetscReal *tmp,*tmp2; 1545 PetscInt *jj,*garray = aij->garray; 1546 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1547 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1548 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1549 *norm = 0.0; 1550 v = amat->a; jj = amat->j; 1551 for (j=0; j<amat->nz; j++) { 1552 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1553 } 1554 v = bmat->a; jj = bmat->j; 1555 for (j=0; j<bmat->nz; j++) { 1556 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1557 } 1558 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1559 for (j=0; j<mat->cmap->N; j++) { 1560 if (tmp2[j] > *norm) *norm = tmp2[j]; 1561 } 1562 ierr = PetscFree(tmp);CHKERRQ(ierr); 1563 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1564 } else if (type == NORM_INFINITY) { /* max row norm */ 1565 PetscReal ntemp = 0.0; 1566 for (j=0; j<aij->A->rmap->n; j++) { 1567 v = amat->a + amat->i[j]; 1568 sum = 0.0; 1569 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1570 sum += PetscAbsScalar(*v); v++; 1571 } 1572 v = bmat->a + bmat->i[j]; 1573 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1574 sum += PetscAbsScalar(*v); v++; 1575 } 1576 if (sum > ntemp) ntemp = sum; 1577 } 1578 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1579 } else { 1580 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1581 } 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatTranspose_MPIAIJ" 1588 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1589 { 1590 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1591 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1592 PetscErrorCode ierr; 1593 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1594 PetscInt cstart=A->cmap->rstart,ncol; 1595 Mat B; 1596 MatScalar *array; 1597 1598 PetscFunctionBegin; 1599 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1600 1601 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1602 ai = Aloc->i; aj = Aloc->j; 1603 bi = Bloc->i; bj = Bloc->j; 1604 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1605 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1606 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1607 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1608 for (i=0; i<ai[ma]; i++){ 1609 d_nnz[aj[i]] ++; 1610 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1611 } 1612 1613 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1614 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1615 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1616 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1617 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1618 } else { 1619 B = *matout; 1620 } 1621 1622 /* copy over the A part */ 1623 array = Aloc->a; 1624 row = A->rmap->rstart; 1625 for (i=0; i<ma; i++) { 1626 ncol = ai[i+1]-ai[i]; 1627 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1628 row++; array += ncol; aj += ncol; 1629 } 1630 aj = Aloc->j; 1631 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1632 1633 /* copy over the B part */ 1634 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1635 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1636 array = Bloc->a; 1637 row = A->rmap->rstart; 1638 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1639 cols_tmp = cols; 1640 for (i=0; i<mb; i++) { 1641 ncol = bi[i+1]-bi[i]; 1642 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1643 row++; array += ncol; cols_tmp += ncol; 1644 } 1645 ierr = PetscFree(cols);CHKERRQ(ierr); 1646 1647 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1648 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1649 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1650 *matout = B; 1651 } else { 1652 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1653 } 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1659 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1660 { 1661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1662 Mat a = aij->A,b = aij->B; 1663 PetscErrorCode ierr; 1664 PetscInt s1,s2,s3; 1665 1666 PetscFunctionBegin; 1667 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1668 if (rr) { 1669 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1670 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1671 /* Overlap communication with computation. */ 1672 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 } 1674 if (ll) { 1675 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1676 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1677 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1678 } 1679 /* scale the diagonal block */ 1680 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1681 1682 if (rr) { 1683 /* Do a scatter end and then right scale the off-diagonal block */ 1684 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1685 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1686 } 1687 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1693 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1694 { 1695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1696 PetscErrorCode ierr; 1697 1698 PetscFunctionBegin; 1699 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1700 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1705 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1706 { 1707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1708 PetscErrorCode ierr; 1709 1710 PetscFunctionBegin; 1711 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatEqual_MPIAIJ" 1717 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1718 { 1719 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1720 Mat a,b,c,d; 1721 PetscTruth flg; 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 a = matA->A; b = matA->B; 1726 c = matB->A; d = matB->B; 1727 1728 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1729 if (flg) { 1730 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1731 } 1732 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatCopy_MPIAIJ" 1738 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1739 { 1740 PetscErrorCode ierr; 1741 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1742 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1743 1744 PetscFunctionBegin; 1745 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1746 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1747 /* because of the column compression in the off-processor part of the matrix a->B, 1748 the number of columns in a->B and b->B may be different, hence we cannot call 1749 the MatCopy() directly on the two parts. If need be, we can provide a more 1750 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1751 then copying the submatrices */ 1752 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1753 } else { 1754 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1755 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1762 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1763 { 1764 PetscErrorCode ierr; 1765 1766 PetscFunctionBegin; 1767 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #include "petscblaslapack.h" 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatAXPY_MPIAIJ" 1774 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1775 { 1776 PetscErrorCode ierr; 1777 PetscInt i; 1778 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1779 PetscBLASInt bnz,one=1; 1780 Mat_SeqAIJ *x,*y; 1781 1782 PetscFunctionBegin; 1783 if (str == SAME_NONZERO_PATTERN) { 1784 PetscScalar alpha = a; 1785 x = (Mat_SeqAIJ *)xx->A->data; 1786 y = (Mat_SeqAIJ *)yy->A->data; 1787 bnz = PetscBLASIntCast(x->nz); 1788 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1789 x = (Mat_SeqAIJ *)xx->B->data; 1790 y = (Mat_SeqAIJ *)yy->B->data; 1791 bnz = PetscBLASIntCast(x->nz); 1792 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1793 } else if (str == SUBSET_NONZERO_PATTERN) { 1794 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1795 1796 x = (Mat_SeqAIJ *)xx->B->data; 1797 y = (Mat_SeqAIJ *)yy->B->data; 1798 if (y->xtoy && y->XtoY != xx->B) { 1799 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1800 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1801 } 1802 if (!y->xtoy) { /* get xtoy */ 1803 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1804 y->XtoY = xx->B; 1805 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1806 } 1807 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1808 } else { 1809 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatConjugate_MPIAIJ" 1818 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1819 { 1820 #if defined(PETSC_USE_COMPLEX) 1821 PetscErrorCode ierr; 1822 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1823 1824 PetscFunctionBegin; 1825 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1826 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1827 #else 1828 PetscFunctionBegin; 1829 #endif 1830 PetscFunctionReturn(0); 1831 } 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatRealPart_MPIAIJ" 1835 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1836 { 1837 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1838 PetscErrorCode ierr; 1839 1840 PetscFunctionBegin; 1841 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1842 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1848 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1849 { 1850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1851 PetscErrorCode ierr; 1852 1853 PetscFunctionBegin; 1854 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1855 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #ifdef PETSC_HAVE_PBGL 1860 1861 #include <boost/parallel/mpi/bsp_process_group.hpp> 1862 #include <boost/graph/distributed/ilu_default_graph.hpp> 1863 #include <boost/graph/distributed/ilu_0_block.hpp> 1864 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1865 #include <boost/graph/distributed/petsc/interface.hpp> 1866 #include <boost/multi_array.hpp> 1867 #include <boost/parallel/distributed_property_map->hpp> 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1871 /* 1872 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1873 */ 1874 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1875 { 1876 namespace petsc = boost::distributed::petsc; 1877 1878 namespace graph_dist = boost::graph::distributed; 1879 using boost::graph::distributed::ilu_default::process_group_type; 1880 using boost::graph::ilu_permuted; 1881 1882 PetscTruth row_identity, col_identity; 1883 PetscContainer c; 1884 PetscInt m, n, M, N; 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1889 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1890 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1891 if (!row_identity || !col_identity) { 1892 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1893 } 1894 1895 process_group_type pg; 1896 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1897 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1898 lgraph_type& level_graph = *lgraph_p; 1899 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1900 1901 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1902 ilu_permuted(level_graph); 1903 1904 /* put together the new matrix */ 1905 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1906 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1907 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1908 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1909 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1910 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1911 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1912 1913 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1914 ierr = PetscContainerSetPointer(c, lgraph_p); 1915 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1921 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1922 { 1923 PetscFunctionBegin; 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatSolve_MPIAIJ" 1929 /* 1930 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1931 */ 1932 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1933 { 1934 namespace graph_dist = boost::graph::distributed; 1935 1936 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1937 lgraph_type* lgraph_p; 1938 PetscContainer c; 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1943 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1944 ierr = VecCopy(b, x);CHKERRQ(ierr); 1945 1946 PetscScalar* array_x; 1947 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1948 PetscInt sx; 1949 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1950 1951 PetscScalar* array_b; 1952 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1953 PetscInt sb; 1954 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1955 1956 lgraph_type& level_graph = *lgraph_p; 1957 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1958 1959 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1960 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1961 ref_x(array_x, boost::extents[num_vertices(graph)]); 1962 1963 typedef boost::iterator_property_map<array_ref_type::iterator, 1964 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1965 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1966 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1967 1968 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1969 1970 PetscFunctionReturn(0); 1971 } 1972 #endif 1973 1974 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1975 PetscInt nzlocal,nsends,nrecvs; 1976 PetscMPIInt *send_rank; 1977 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1978 PetscScalar *sbuf_a,**rbuf_a; 1979 PetscErrorCode (*MatDestroy)(Mat); 1980 } Mat_Redundant; 1981 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1984 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1985 { 1986 PetscErrorCode ierr; 1987 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1988 PetscInt i; 1989 1990 PetscFunctionBegin; 1991 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1992 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1993 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1994 for (i=0; i<redund->nrecvs; i++){ 1995 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1996 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1997 } 1998 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1999 ierr = PetscFree(redund);CHKERRQ(ierr); 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatDestroy_MatRedundant" 2005 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2006 { 2007 PetscErrorCode ierr; 2008 PetscContainer container; 2009 Mat_Redundant *redund=PETSC_NULL; 2010 2011 PetscFunctionBegin; 2012 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2013 if (container) { 2014 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2015 } else { 2016 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2017 } 2018 A->ops->destroy = redund->MatDestroy; 2019 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2020 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2021 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2027 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2028 { 2029 PetscMPIInt rank,size; 2030 MPI_Comm comm=((PetscObject)mat)->comm; 2031 PetscErrorCode ierr; 2032 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2033 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2034 PetscInt *rowrange=mat->rmap->range; 2035 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2036 Mat A=aij->A,B=aij->B,C=*matredundant; 2037 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2038 PetscScalar *sbuf_a; 2039 PetscInt nzlocal=a->nz+b->nz; 2040 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2041 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2042 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2043 MatScalar *aworkA,*aworkB; 2044 PetscScalar *vals; 2045 PetscMPIInt tag1,tag2,tag3,imdex; 2046 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2047 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2048 MPI_Status recv_status,*send_status; 2049 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2050 PetscInt **rbuf_j=PETSC_NULL; 2051 PetscScalar **rbuf_a=PETSC_NULL; 2052 Mat_Redundant *redund=PETSC_NULL; 2053 PetscContainer container; 2054 2055 PetscFunctionBegin; 2056 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2057 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2058 2059 if (reuse == MAT_REUSE_MATRIX) { 2060 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2061 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2062 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2063 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2064 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2065 if (container) { 2066 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2067 } else { 2068 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2069 } 2070 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2071 2072 nsends = redund->nsends; 2073 nrecvs = redund->nrecvs; 2074 send_rank = redund->send_rank; recv_rank = send_rank + size; 2075 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2076 sbuf_j = redund->sbuf_j; 2077 sbuf_a = redund->sbuf_a; 2078 rbuf_j = redund->rbuf_j; 2079 rbuf_a = redund->rbuf_a; 2080 } 2081 2082 if (reuse == MAT_INITIAL_MATRIX){ 2083 PetscMPIInt subrank,subsize; 2084 PetscInt nleftover,np_subcomm; 2085 /* get the destination processors' id send_rank, nsends and nrecvs */ 2086 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2087 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2088 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2089 recv_rank = send_rank + size; 2090 np_subcomm = size/nsubcomm; 2091 nleftover = size - nsubcomm*np_subcomm; 2092 nsends = 0; nrecvs = 0; 2093 for (i=0; i<size; i++){ /* i=rank*/ 2094 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2095 send_rank[nsends] = i; nsends++; 2096 recv_rank[nrecvs++] = i; 2097 } 2098 } 2099 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2100 i = size-nleftover-1; 2101 j = 0; 2102 while (j < nsubcomm - nleftover){ 2103 send_rank[nsends++] = i; 2104 i--; j++; 2105 } 2106 } 2107 2108 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2109 for (i=0; i<nleftover; i++){ 2110 recv_rank[nrecvs++] = size-nleftover+i; 2111 } 2112 } 2113 2114 /* allocate sbuf_j, sbuf_a */ 2115 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2116 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2117 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2118 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2119 2120 /* copy mat's local entries into the buffers */ 2121 if (reuse == MAT_INITIAL_MATRIX){ 2122 rownz_max = 0; 2123 rptr = sbuf_j; 2124 cols = sbuf_j + rend-rstart + 1; 2125 vals = sbuf_a; 2126 rptr[0] = 0; 2127 for (i=0; i<rend-rstart; i++){ 2128 row = i + rstart; 2129 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2130 ncols = nzA + nzB; 2131 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2132 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2133 /* load the column indices for this row into cols */ 2134 lwrite = 0; 2135 for (l=0; l<nzB; l++) { 2136 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2137 vals[lwrite] = aworkB[l]; 2138 cols[lwrite++] = ctmp; 2139 } 2140 } 2141 for (l=0; l<nzA; l++){ 2142 vals[lwrite] = aworkA[l]; 2143 cols[lwrite++] = cstart + cworkA[l]; 2144 } 2145 for (l=0; l<nzB; l++) { 2146 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2147 vals[lwrite] = aworkB[l]; 2148 cols[lwrite++] = ctmp; 2149 } 2150 } 2151 vals += ncols; 2152 cols += ncols; 2153 rptr[i+1] = rptr[i] + ncols; 2154 if (rownz_max < ncols) rownz_max = ncols; 2155 } 2156 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2157 } else { /* only copy matrix values into sbuf_a */ 2158 rptr = sbuf_j; 2159 vals = sbuf_a; 2160 rptr[0] = 0; 2161 for (i=0; i<rend-rstart; i++){ 2162 row = i + rstart; 2163 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2164 ncols = nzA + nzB; 2165 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2166 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2167 lwrite = 0; 2168 for (l=0; l<nzB; l++) { 2169 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2170 } 2171 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2172 for (l=0; l<nzB; l++) { 2173 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2174 } 2175 vals += ncols; 2176 rptr[i+1] = rptr[i] + ncols; 2177 } 2178 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2179 2180 /* send nzlocal to others, and recv other's nzlocal */ 2181 /*--------------------------------------------------*/ 2182 if (reuse == MAT_INITIAL_MATRIX){ 2183 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2184 s_waits2 = s_waits3 + nsends; 2185 s_waits1 = s_waits2 + nsends; 2186 r_waits1 = s_waits1 + nsends; 2187 r_waits2 = r_waits1 + nrecvs; 2188 r_waits3 = r_waits2 + nrecvs; 2189 } else { 2190 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2191 r_waits3 = s_waits3 + nsends; 2192 } 2193 2194 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2195 if (reuse == MAT_INITIAL_MATRIX){ 2196 /* get new tags to keep the communication clean */ 2197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2198 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2199 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2200 rbuf_nz = sbuf_nz + nsends; 2201 2202 /* post receives of other's nzlocal */ 2203 for (i=0; i<nrecvs; i++){ 2204 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2205 } 2206 /* send nzlocal to others */ 2207 for (i=0; i<nsends; i++){ 2208 sbuf_nz[i] = nzlocal; 2209 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2210 } 2211 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2212 count = nrecvs; 2213 while (count) { 2214 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2215 recv_rank[imdex] = recv_status.MPI_SOURCE; 2216 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2217 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2218 2219 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2220 rbuf_nz[imdex] += i + 2; 2221 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2222 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2223 count--; 2224 } 2225 /* wait on sends of nzlocal */ 2226 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2227 /* send mat->i,j to others, and recv from other's */ 2228 /*------------------------------------------------*/ 2229 for (i=0; i<nsends; i++){ 2230 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2231 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2232 } 2233 /* wait on receives of mat->i,j */ 2234 /*------------------------------*/ 2235 count = nrecvs; 2236 while (count) { 2237 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2238 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2239 count--; 2240 } 2241 /* wait on sends of mat->i,j */ 2242 /*---------------------------*/ 2243 if (nsends) { 2244 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2245 } 2246 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2247 2248 /* post receives, send and receive mat->a */ 2249 /*----------------------------------------*/ 2250 for (imdex=0; imdex<nrecvs; imdex++) { 2251 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2252 } 2253 for (i=0; i<nsends; i++){ 2254 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2255 } 2256 count = nrecvs; 2257 while (count) { 2258 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2259 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2260 count--; 2261 } 2262 if (nsends) { 2263 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2264 } 2265 2266 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2267 2268 /* create redundant matrix */ 2269 /*-------------------------*/ 2270 if (reuse == MAT_INITIAL_MATRIX){ 2271 /* compute rownz_max for preallocation */ 2272 for (imdex=0; imdex<nrecvs; imdex++){ 2273 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2274 rptr = rbuf_j[imdex]; 2275 for (i=0; i<j; i++){ 2276 ncols = rptr[i+1] - rptr[i]; 2277 if (rownz_max < ncols) rownz_max = ncols; 2278 } 2279 } 2280 2281 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2282 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2283 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2284 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2285 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2286 } else { 2287 C = *matredundant; 2288 } 2289 2290 /* insert local matrix entries */ 2291 rptr = sbuf_j; 2292 cols = sbuf_j + rend-rstart + 1; 2293 vals = sbuf_a; 2294 for (i=0; i<rend-rstart; i++){ 2295 row = i + rstart; 2296 ncols = rptr[i+1] - rptr[i]; 2297 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2298 vals += ncols; 2299 cols += ncols; 2300 } 2301 /* insert received matrix entries */ 2302 for (imdex=0; imdex<nrecvs; imdex++){ 2303 rstart = rowrange[recv_rank[imdex]]; 2304 rend = rowrange[recv_rank[imdex]+1]; 2305 rptr = rbuf_j[imdex]; 2306 cols = rbuf_j[imdex] + rend-rstart + 1; 2307 vals = rbuf_a[imdex]; 2308 for (i=0; i<rend-rstart; i++){ 2309 row = i + rstart; 2310 ncols = rptr[i+1] - rptr[i]; 2311 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2312 vals += ncols; 2313 cols += ncols; 2314 } 2315 } 2316 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2317 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2318 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2319 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2320 if (reuse == MAT_INITIAL_MATRIX){ 2321 PetscContainer container; 2322 *matredundant = C; 2323 /* create a supporting struct and attach it to C for reuse */ 2324 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2325 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2326 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2327 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2328 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2329 2330 redund->nzlocal = nzlocal; 2331 redund->nsends = nsends; 2332 redund->nrecvs = nrecvs; 2333 redund->send_rank = send_rank; 2334 redund->sbuf_nz = sbuf_nz; 2335 redund->sbuf_j = sbuf_j; 2336 redund->sbuf_a = sbuf_a; 2337 redund->rbuf_j = rbuf_j; 2338 redund->rbuf_a = rbuf_a; 2339 2340 redund->MatDestroy = C->ops->destroy; 2341 C->ops->destroy = MatDestroy_MatRedundant; 2342 } 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2348 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2349 { 2350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2351 PetscErrorCode ierr; 2352 PetscInt i,*idxb = 0; 2353 PetscScalar *va,*vb; 2354 Vec vtmp; 2355 2356 PetscFunctionBegin; 2357 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2358 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2359 if (idx) { 2360 for (i=0; i<A->rmap->n; i++) { 2361 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2362 } 2363 } 2364 2365 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2366 if (idx) { 2367 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2368 } 2369 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2370 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2371 2372 for (i=0; i<A->rmap->n; i++){ 2373 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2374 va[i] = vb[i]; 2375 if (idx) idx[i] = a->garray[idxb[i]]; 2376 } 2377 } 2378 2379 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2380 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2381 if (idxb) { 2382 ierr = PetscFree(idxb);CHKERRQ(ierr); 2383 } 2384 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2391 { 2392 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2393 PetscErrorCode ierr; 2394 PetscInt i,*idxb = 0; 2395 PetscScalar *va,*vb; 2396 Vec vtmp; 2397 2398 PetscFunctionBegin; 2399 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2400 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2401 if (idx) { 2402 for (i=0; i<A->cmap->n; i++) { 2403 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2404 } 2405 } 2406 2407 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2408 if (idx) { 2409 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2410 } 2411 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2412 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2413 2414 for (i=0; i<A->rmap->n; i++){ 2415 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2416 va[i] = vb[i]; 2417 if (idx) idx[i] = a->garray[idxb[i]]; 2418 } 2419 } 2420 2421 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2422 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2423 if (idxb) { 2424 ierr = PetscFree(idxb);CHKERRQ(ierr); 2425 } 2426 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2432 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2433 { 2434 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2435 PetscInt n = A->rmap->n; 2436 PetscInt cstart = A->cmap->rstart; 2437 PetscInt *cmap = mat->garray; 2438 PetscInt *diagIdx, *offdiagIdx; 2439 Vec diagV, offdiagV; 2440 PetscScalar *a, *diagA, *offdiagA; 2441 PetscInt r; 2442 PetscErrorCode ierr; 2443 2444 PetscFunctionBegin; 2445 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2446 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2447 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2448 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2449 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2450 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2451 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2452 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2453 for(r = 0; r < n; ++r) { 2454 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2455 a[r] = diagA[r]; 2456 idx[r] = cstart + diagIdx[r]; 2457 } else { 2458 a[r] = offdiagA[r]; 2459 idx[r] = cmap[offdiagIdx[r]]; 2460 } 2461 } 2462 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2464 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2465 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2466 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2467 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2473 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2474 { 2475 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2476 PetscInt n = A->rmap->n; 2477 PetscInt cstart = A->cmap->rstart; 2478 PetscInt *cmap = mat->garray; 2479 PetscInt *diagIdx, *offdiagIdx; 2480 Vec diagV, offdiagV; 2481 PetscScalar *a, *diagA, *offdiagA; 2482 PetscInt r; 2483 PetscErrorCode ierr; 2484 2485 PetscFunctionBegin; 2486 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2487 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2488 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2489 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2490 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2491 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2492 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2493 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2494 for(r = 0; r < n; ++r) { 2495 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2496 a[r] = diagA[r]; 2497 idx[r] = cstart + diagIdx[r]; 2498 } else { 2499 a[r] = offdiagA[r]; 2500 idx[r] = cmap[offdiagIdx[r]]; 2501 } 2502 } 2503 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2504 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2505 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2506 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2507 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2508 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2509 PetscFunctionReturn(0); 2510 } 2511 2512 #undef __FUNCT__ 2513 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2514 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2515 { 2516 PetscErrorCode ierr; 2517 2518 PetscFunctionBegin; 2519 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 /* -------------------------------------------------------------------*/ 2524 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2525 MatGetRow_MPIAIJ, 2526 MatRestoreRow_MPIAIJ, 2527 MatMult_MPIAIJ, 2528 /* 4*/ MatMultAdd_MPIAIJ, 2529 MatMultTranspose_MPIAIJ, 2530 MatMultTransposeAdd_MPIAIJ, 2531 #ifdef PETSC_HAVE_PBGL 2532 MatSolve_MPIAIJ, 2533 #else 2534 0, 2535 #endif 2536 0, 2537 0, 2538 /*10*/ 0, 2539 0, 2540 0, 2541 MatRelax_MPIAIJ, 2542 MatTranspose_MPIAIJ, 2543 /*15*/ MatGetInfo_MPIAIJ, 2544 MatEqual_MPIAIJ, 2545 MatGetDiagonal_MPIAIJ, 2546 MatDiagonalScale_MPIAIJ, 2547 MatNorm_MPIAIJ, 2548 /*20*/ MatAssemblyBegin_MPIAIJ, 2549 MatAssemblyEnd_MPIAIJ, 2550 0, 2551 MatSetOption_MPIAIJ, 2552 MatZeroEntries_MPIAIJ, 2553 /*25*/ MatZeroRows_MPIAIJ, 2554 0, 2555 #ifdef PETSC_HAVE_PBGL 2556 0, 2557 #else 2558 0, 2559 #endif 2560 0, 2561 0, 2562 /*30*/ MatSetUpPreallocation_MPIAIJ, 2563 #ifdef PETSC_HAVE_PBGL 2564 0, 2565 #else 2566 0, 2567 #endif 2568 0, 2569 0, 2570 0, 2571 /*35*/ MatDuplicate_MPIAIJ, 2572 0, 2573 0, 2574 0, 2575 0, 2576 /*40*/ MatAXPY_MPIAIJ, 2577 MatGetSubMatrices_MPIAIJ, 2578 MatIncreaseOverlap_MPIAIJ, 2579 MatGetValues_MPIAIJ, 2580 MatCopy_MPIAIJ, 2581 /*45*/ MatGetRowMax_MPIAIJ, 2582 MatScale_MPIAIJ, 2583 0, 2584 0, 2585 0, 2586 /*50*/ MatSetBlockSize_MPIAIJ, 2587 0, 2588 0, 2589 0, 2590 0, 2591 /*55*/ MatFDColoringCreate_MPIAIJ, 2592 0, 2593 MatSetUnfactored_MPIAIJ, 2594 MatPermute_MPIAIJ, 2595 0, 2596 /*60*/ MatGetSubMatrix_MPIAIJ, 2597 MatDestroy_MPIAIJ, 2598 MatView_MPIAIJ, 2599 0, 2600 0, 2601 /*65*/ 0, 2602 0, 2603 0, 2604 0, 2605 0, 2606 /*70*/ MatGetRowMaxAbs_MPIAIJ, 2607 MatGetRowMinAbs_MPIAIJ, 2608 0, 2609 MatSetColoring_MPIAIJ, 2610 #if defined(PETSC_HAVE_ADIC) 2611 MatSetValuesAdic_MPIAIJ, 2612 #else 2613 0, 2614 #endif 2615 MatSetValuesAdifor_MPIAIJ, 2616 /*75*/ 0, 2617 0, 2618 0, 2619 0, 2620 0, 2621 /*80*/ 0, 2622 0, 2623 0, 2624 /*84*/ MatLoad_MPIAIJ, 2625 0, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2631 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2632 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2633 MatPtAP_Basic, 2634 MatPtAPSymbolic_MPIAIJ, 2635 /*95*/ MatPtAPNumeric_MPIAIJ, 2636 0, 2637 0, 2638 0, 2639 0, 2640 /*100*/0, 2641 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2642 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2643 MatConjugate_MPIAIJ, 2644 0, 2645 /*105*/MatSetValuesRow_MPIAIJ, 2646 MatRealPart_MPIAIJ, 2647 MatImaginaryPart_MPIAIJ, 2648 0, 2649 0, 2650 /*110*/0, 2651 MatGetRedundantMatrix_MPIAIJ, 2652 MatGetRowMin_MPIAIJ, 2653 0, 2654 0, 2655 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2656 2657 /* ----------------------------------------------------------------------------------------*/ 2658 2659 EXTERN_C_BEGIN 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2662 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2663 { 2664 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2665 PetscErrorCode ierr; 2666 2667 PetscFunctionBegin; 2668 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2669 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2670 PetscFunctionReturn(0); 2671 } 2672 EXTERN_C_END 2673 2674 EXTERN_C_BEGIN 2675 #undef __FUNCT__ 2676 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2677 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2678 { 2679 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2680 PetscErrorCode ierr; 2681 2682 PetscFunctionBegin; 2683 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2684 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2685 PetscFunctionReturn(0); 2686 } 2687 EXTERN_C_END 2688 2689 #include "petscpc.h" 2690 EXTERN_C_BEGIN 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2693 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2694 { 2695 Mat_MPIAIJ *b; 2696 PetscErrorCode ierr; 2697 PetscInt i; 2698 2699 PetscFunctionBegin; 2700 B->preallocated = PETSC_TRUE; 2701 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2702 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2703 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2704 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2705 2706 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2707 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2708 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2709 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2710 if (d_nnz) { 2711 for (i=0; i<B->rmap->n; i++) { 2712 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2713 } 2714 } 2715 if (o_nnz) { 2716 for (i=0; i<B->rmap->n; i++) { 2717 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2718 } 2719 } 2720 b = (Mat_MPIAIJ*)B->data; 2721 2722 /* Explicitly create 2 MATSEQAIJ matrices. */ 2723 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2724 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2725 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2726 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2727 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2728 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2729 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2730 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2731 2732 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2733 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2734 2735 PetscFunctionReturn(0); 2736 } 2737 EXTERN_C_END 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2741 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2742 { 2743 Mat mat; 2744 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2745 PetscErrorCode ierr; 2746 2747 PetscFunctionBegin; 2748 *newmat = 0; 2749 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2750 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2751 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2752 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2753 a = (Mat_MPIAIJ*)mat->data; 2754 2755 mat->factor = matin->factor; 2756 mat->rmap->bs = matin->rmap->bs; 2757 mat->assembled = PETSC_TRUE; 2758 mat->insertmode = NOT_SET_VALUES; 2759 mat->preallocated = PETSC_TRUE; 2760 2761 a->size = oldmat->size; 2762 a->rank = oldmat->rank; 2763 a->donotstash = oldmat->donotstash; 2764 a->roworiented = oldmat->roworiented; 2765 a->rowindices = 0; 2766 a->rowvalues = 0; 2767 a->getrowactive = PETSC_FALSE; 2768 2769 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2770 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2771 2772 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2773 if (oldmat->colmap) { 2774 #if defined (PETSC_USE_CTABLE) 2775 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2776 #else 2777 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2778 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2779 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2780 #endif 2781 } else a->colmap = 0; 2782 if (oldmat->garray) { 2783 PetscInt len; 2784 len = oldmat->B->cmap->n; 2785 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2786 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2787 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2788 } else a->garray = 0; 2789 2790 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2792 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2793 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2794 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2795 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2796 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2798 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2799 *newmat = mat; 2800 PetscFunctionReturn(0); 2801 } 2802 2803 #include "petscsys.h" 2804 2805 #undef __FUNCT__ 2806 #define __FUNCT__ "MatLoad_MPIAIJ" 2807 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2808 { 2809 Mat A; 2810 PetscScalar *vals,*svals; 2811 MPI_Comm comm = ((PetscObject)viewer)->comm; 2812 MPI_Status status; 2813 PetscErrorCode ierr; 2814 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2815 PetscInt i,nz,j,rstart,rend,mmax; 2816 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2817 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2818 PetscInt cend,cstart,n,*rowners; 2819 int fd; 2820 2821 PetscFunctionBegin; 2822 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2823 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2824 if (!rank) { 2825 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2826 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2827 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2828 } 2829 2830 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2831 M = header[1]; N = header[2]; 2832 /* determine ownership of all rows */ 2833 m = M/size + ((M % size) > rank); 2834 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2835 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2836 2837 /* First process needs enough room for process with most rows */ 2838 if (!rank) { 2839 mmax = rowners[1]; 2840 for (i=2; i<size; i++) { 2841 mmax = PetscMax(mmax,rowners[i]); 2842 } 2843 } else mmax = m; 2844 2845 rowners[0] = 0; 2846 for (i=2; i<=size; i++) { 2847 rowners[i] += rowners[i-1]; 2848 } 2849 rstart = rowners[rank]; 2850 rend = rowners[rank+1]; 2851 2852 /* distribute row lengths to all processors */ 2853 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2854 if (!rank) { 2855 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2856 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2857 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2858 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2859 for (j=0; j<m; j++) { 2860 procsnz[0] += ourlens[j]; 2861 } 2862 for (i=1; i<size; i++) { 2863 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2864 /* calculate the number of nonzeros on each processor */ 2865 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2866 procsnz[i] += rowlengths[j]; 2867 } 2868 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2869 } 2870 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2871 } else { 2872 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2873 } 2874 2875 if (!rank) { 2876 /* determine max buffer needed and allocate it */ 2877 maxnz = 0; 2878 for (i=0; i<size; i++) { 2879 maxnz = PetscMax(maxnz,procsnz[i]); 2880 } 2881 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2882 2883 /* read in my part of the matrix column indices */ 2884 nz = procsnz[0]; 2885 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2886 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2887 2888 /* read in every one elses and ship off */ 2889 for (i=1; i<size; i++) { 2890 nz = procsnz[i]; 2891 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2892 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2893 } 2894 ierr = PetscFree(cols);CHKERRQ(ierr); 2895 } else { 2896 /* determine buffer space needed for message */ 2897 nz = 0; 2898 for (i=0; i<m; i++) { 2899 nz += ourlens[i]; 2900 } 2901 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2902 2903 /* receive message of column indices*/ 2904 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2905 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2906 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2907 } 2908 2909 /* determine column ownership if matrix is not square */ 2910 if (N != M) { 2911 n = N/size + ((N % size) > rank); 2912 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2913 cstart = cend - n; 2914 } else { 2915 cstart = rstart; 2916 cend = rend; 2917 n = cend - cstart; 2918 } 2919 2920 /* loop over local rows, determining number of off diagonal entries */ 2921 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2922 jj = 0; 2923 for (i=0; i<m; i++) { 2924 for (j=0; j<ourlens[i]; j++) { 2925 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2926 jj++; 2927 } 2928 } 2929 2930 /* create our matrix */ 2931 for (i=0; i<m; i++) { 2932 ourlens[i] -= offlens[i]; 2933 } 2934 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2935 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2936 ierr = MatSetType(A,type);CHKERRQ(ierr); 2937 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2938 2939 for (i=0; i<m; i++) { 2940 ourlens[i] += offlens[i]; 2941 } 2942 2943 if (!rank) { 2944 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2945 2946 /* read in my part of the matrix numerical values */ 2947 nz = procsnz[0]; 2948 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2949 2950 /* insert into matrix */ 2951 jj = rstart; 2952 smycols = mycols; 2953 svals = vals; 2954 for (i=0; i<m; i++) { 2955 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2956 smycols += ourlens[i]; 2957 svals += ourlens[i]; 2958 jj++; 2959 } 2960 2961 /* read in other processors and ship out */ 2962 for (i=1; i<size; i++) { 2963 nz = procsnz[i]; 2964 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2965 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2966 } 2967 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2968 } else { 2969 /* receive numeric values */ 2970 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2971 2972 /* receive message of values*/ 2973 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2974 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2975 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2976 2977 /* insert into matrix */ 2978 jj = rstart; 2979 smycols = mycols; 2980 svals = vals; 2981 for (i=0; i<m; i++) { 2982 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2983 smycols += ourlens[i]; 2984 svals += ourlens[i]; 2985 jj++; 2986 } 2987 } 2988 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2989 ierr = PetscFree(vals);CHKERRQ(ierr); 2990 ierr = PetscFree(mycols);CHKERRQ(ierr); 2991 ierr = PetscFree(rowners);CHKERRQ(ierr); 2992 2993 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2994 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2995 *newmat = A; 2996 PetscFunctionReturn(0); 2997 } 2998 2999 #undef __FUNCT__ 3000 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3001 /* 3002 Not great since it makes two copies of the submatrix, first an SeqAIJ 3003 in local and then by concatenating the local matrices the end result. 3004 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3005 */ 3006 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3007 { 3008 PetscErrorCode ierr; 3009 PetscMPIInt rank,size; 3010 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3011 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3012 Mat *local,M,Mreuse; 3013 MatScalar *vwork,*aa; 3014 MPI_Comm comm = ((PetscObject)mat)->comm; 3015 Mat_SeqAIJ *aij; 3016 3017 3018 PetscFunctionBegin; 3019 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3020 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3021 3022 if (call == MAT_REUSE_MATRIX) { 3023 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3024 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3025 local = &Mreuse; 3026 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3027 } else { 3028 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3029 Mreuse = *local; 3030 ierr = PetscFree(local);CHKERRQ(ierr); 3031 } 3032 3033 /* 3034 m - number of local rows 3035 n - number of columns (same on all processors) 3036 rstart - first row in new global matrix generated 3037 */ 3038 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3039 if (call == MAT_INITIAL_MATRIX) { 3040 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3041 ii = aij->i; 3042 jj = aij->j; 3043 3044 /* 3045 Determine the number of non-zeros in the diagonal and off-diagonal 3046 portions of the matrix in order to do correct preallocation 3047 */ 3048 3049 /* first get start and end of "diagonal" columns */ 3050 if (csize == PETSC_DECIDE) { 3051 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3052 if (mglobal == n) { /* square matrix */ 3053 nlocal = m; 3054 } else { 3055 nlocal = n/size + ((n % size) > rank); 3056 } 3057 } else { 3058 nlocal = csize; 3059 } 3060 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3061 rstart = rend - nlocal; 3062 if (rank == size - 1 && rend != n) { 3063 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3064 } 3065 3066 /* next, compute all the lengths */ 3067 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3068 olens = dlens + m; 3069 for (i=0; i<m; i++) { 3070 jend = ii[i+1] - ii[i]; 3071 olen = 0; 3072 dlen = 0; 3073 for (j=0; j<jend; j++) { 3074 if (*jj < rstart || *jj >= rend) olen++; 3075 else dlen++; 3076 jj++; 3077 } 3078 olens[i] = olen; 3079 dlens[i] = dlen; 3080 } 3081 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3082 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3083 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3084 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3085 ierr = PetscFree(dlens);CHKERRQ(ierr); 3086 } else { 3087 PetscInt ml,nl; 3088 3089 M = *newmat; 3090 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3091 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3092 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3093 /* 3094 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3095 rather than the slower MatSetValues(). 3096 */ 3097 M->was_assembled = PETSC_TRUE; 3098 M->assembled = PETSC_FALSE; 3099 } 3100 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3101 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3102 ii = aij->i; 3103 jj = aij->j; 3104 aa = aij->a; 3105 for (i=0; i<m; i++) { 3106 row = rstart + i; 3107 nz = ii[i+1] - ii[i]; 3108 cwork = jj; jj += nz; 3109 vwork = aa; aa += nz; 3110 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3111 } 3112 3113 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3114 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3115 *newmat = M; 3116 3117 /* save submatrix used in processor for next request */ 3118 if (call == MAT_INITIAL_MATRIX) { 3119 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3120 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3121 } 3122 3123 PetscFunctionReturn(0); 3124 } 3125 3126 EXTERN_C_BEGIN 3127 #undef __FUNCT__ 3128 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3129 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3130 { 3131 PetscInt m,cstart, cend,j,nnz,i,d; 3132 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3133 const PetscInt *JJ; 3134 PetscScalar *values; 3135 PetscErrorCode ierr; 3136 3137 PetscFunctionBegin; 3138 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3139 3140 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3141 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3142 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3143 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3144 m = B->rmap->n; 3145 cstart = B->cmap->rstart; 3146 cend = B->cmap->rend; 3147 rstart = B->rmap->rstart; 3148 3149 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3150 o_nnz = d_nnz + m; 3151 3152 #if defined(PETSC_USE_DEBUGGING) 3153 for (i=0; i<m; i++) { 3154 nnz = Ii[i+1]- Ii[i]; 3155 JJ = J + Ii[i]; 3156 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3157 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3158 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3159 for (j=1; j<nnz; j++) { 3160 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3161 } 3162 } 3163 #endif 3164 3165 for (i=0; i<m; i++) { 3166 nnz = Ii[i+1]- Ii[i]; 3167 JJ = J + Ii[i]; 3168 nnz_max = PetscMax(nnz_max,nnz); 3169 for (j=0; j<nnz; j++) { 3170 if (*JJ >= cstart) break; 3171 JJ++; 3172 } 3173 d = 0; 3174 for (; j<nnz; j++) { 3175 if (*JJ++ >= cend) break; 3176 d++; 3177 } 3178 d_nnz[i] = d; 3179 o_nnz[i] = nnz - d; 3180 } 3181 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3182 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3183 3184 if (v) values = (PetscScalar*)v; 3185 else { 3186 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3187 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3188 } 3189 3190 for (i=0; i<m; i++) { 3191 ii = i + rstart; 3192 nnz = Ii[i+1]- Ii[i]; 3193 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3194 } 3195 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3196 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3197 3198 if (!v) { 3199 ierr = PetscFree(values);CHKERRQ(ierr); 3200 } 3201 PetscFunctionReturn(0); 3202 } 3203 EXTERN_C_END 3204 3205 #undef __FUNCT__ 3206 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3207 /*@ 3208 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3209 (the default parallel PETSc format). 3210 3211 Collective on MPI_Comm 3212 3213 Input Parameters: 3214 + B - the matrix 3215 . i - the indices into j for the start of each local row (starts with zero) 3216 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3217 - v - optional values in the matrix 3218 3219 Level: developer 3220 3221 Notes: 3222 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3223 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3224 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3225 3226 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3227 3228 The format which is used for the sparse matrix input, is equivalent to a 3229 row-major ordering.. i.e for the following matrix, the input data expected is 3230 as shown: 3231 3232 1 0 0 3233 2 0 3 P0 3234 ------- 3235 4 5 6 P1 3236 3237 Process0 [P0]: rows_owned=[0,1] 3238 i = {0,1,3} [size = nrow+1 = 2+1] 3239 j = {0,0,2} [size = nz = 6] 3240 v = {1,2,3} [size = nz = 6] 3241 3242 Process1 [P1]: rows_owned=[2] 3243 i = {0,3} [size = nrow+1 = 1+1] 3244 j = {0,1,2} [size = nz = 6] 3245 v = {4,5,6} [size = nz = 6] 3246 3247 The column indices for each row MUST be sorted. 3248 3249 .keywords: matrix, aij, compressed row, sparse, parallel 3250 3251 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3252 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3253 @*/ 3254 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3255 { 3256 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3257 3258 PetscFunctionBegin; 3259 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3260 if (f) { 3261 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3262 } 3263 PetscFunctionReturn(0); 3264 } 3265 3266 #undef __FUNCT__ 3267 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3268 /*@C 3269 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3270 (the default parallel PETSc format). For good matrix assembly performance 3271 the user should preallocate the matrix storage by setting the parameters 3272 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3273 performance can be increased by more than a factor of 50. 3274 3275 Collective on MPI_Comm 3276 3277 Input Parameters: 3278 + A - the matrix 3279 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3280 (same value is used for all local rows) 3281 . d_nnz - array containing the number of nonzeros in the various rows of the 3282 DIAGONAL portion of the local submatrix (possibly different for each row) 3283 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3284 The size of this array is equal to the number of local rows, i.e 'm'. 3285 You must leave room for the diagonal entry even if it is zero. 3286 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3287 submatrix (same value is used for all local rows). 3288 - o_nnz - array containing the number of nonzeros in the various rows of the 3289 OFF-DIAGONAL portion of the local submatrix (possibly different for 3290 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3291 structure. The size of this array is equal to the number 3292 of local rows, i.e 'm'. 3293 3294 If the *_nnz parameter is given then the *_nz parameter is ignored 3295 3296 The AIJ format (also called the Yale sparse matrix format or 3297 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3298 storage. The stored row and column indices begin with zero. See the users manual for details. 3299 3300 The parallel matrix is partitioned such that the first m0 rows belong to 3301 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3302 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3303 3304 The DIAGONAL portion of the local submatrix of a processor can be defined 3305 as the submatrix which is obtained by extraction the part corresponding 3306 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3307 first row that belongs to the processor, and r2 is the last row belonging 3308 to the this processor. This is a square mxm matrix. The remaining portion 3309 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3310 3311 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3312 3313 You can call MatGetInfo() to get information on how effective the preallocation was; 3314 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3315 You can also run with the option -info and look for messages with the string 3316 malloc in them to see if additional memory allocation was needed. 3317 3318 Example usage: 3319 3320 Consider the following 8x8 matrix with 34 non-zero values, that is 3321 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3322 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3323 as follows: 3324 3325 .vb 3326 1 2 0 | 0 3 0 | 0 4 3327 Proc0 0 5 6 | 7 0 0 | 8 0 3328 9 0 10 | 11 0 0 | 12 0 3329 ------------------------------------- 3330 13 0 14 | 15 16 17 | 0 0 3331 Proc1 0 18 0 | 19 20 21 | 0 0 3332 0 0 0 | 22 23 0 | 24 0 3333 ------------------------------------- 3334 Proc2 25 26 27 | 0 0 28 | 29 0 3335 30 0 0 | 31 32 33 | 0 34 3336 .ve 3337 3338 This can be represented as a collection of submatrices as: 3339 3340 .vb 3341 A B C 3342 D E F 3343 G H I 3344 .ve 3345 3346 Where the submatrices A,B,C are owned by proc0, D,E,F are 3347 owned by proc1, G,H,I are owned by proc2. 3348 3349 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3350 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3351 The 'M','N' parameters are 8,8, and have the same values on all procs. 3352 3353 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3354 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3355 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3356 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3357 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3358 matrix, ans [DF] as another SeqAIJ matrix. 3359 3360 When d_nz, o_nz parameters are specified, d_nz storage elements are 3361 allocated for every row of the local diagonal submatrix, and o_nz 3362 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3363 One way to choose d_nz and o_nz is to use the max nonzerors per local 3364 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3365 In this case, the values of d_nz,o_nz are: 3366 .vb 3367 proc0 : dnz = 2, o_nz = 2 3368 proc1 : dnz = 3, o_nz = 2 3369 proc2 : dnz = 1, o_nz = 4 3370 .ve 3371 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3372 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3373 for proc3. i.e we are using 12+15+10=37 storage locations to store 3374 34 values. 3375 3376 When d_nnz, o_nnz parameters are specified, the storage is specified 3377 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3378 In the above case the values for d_nnz,o_nnz are: 3379 .vb 3380 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3381 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3382 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3383 .ve 3384 Here the space allocated is sum of all the above values i.e 34, and 3385 hence pre-allocation is perfect. 3386 3387 Level: intermediate 3388 3389 .keywords: matrix, aij, compressed row, sparse, parallel 3390 3391 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3392 MPIAIJ, MatGetInfo() 3393 @*/ 3394 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3395 { 3396 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3397 3398 PetscFunctionBegin; 3399 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3400 if (f) { 3401 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3402 } 3403 PetscFunctionReturn(0); 3404 } 3405 3406 #undef __FUNCT__ 3407 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3408 /*@ 3409 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3410 CSR format the local rows. 3411 3412 Collective on MPI_Comm 3413 3414 Input Parameters: 3415 + comm - MPI communicator 3416 . m - number of local rows (Cannot be PETSC_DECIDE) 3417 . n - This value should be the same as the local size used in creating the 3418 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3419 calculated if N is given) For square matrices n is almost always m. 3420 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3421 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3422 . i - row indices 3423 . j - column indices 3424 - a - matrix values 3425 3426 Output Parameter: 3427 . mat - the matrix 3428 3429 Level: intermediate 3430 3431 Notes: 3432 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3433 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3434 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3435 3436 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3437 3438 The format which is used for the sparse matrix input, is equivalent to a 3439 row-major ordering.. i.e for the following matrix, the input data expected is 3440 as shown: 3441 3442 1 0 0 3443 2 0 3 P0 3444 ------- 3445 4 5 6 P1 3446 3447 Process0 [P0]: rows_owned=[0,1] 3448 i = {0,1,3} [size = nrow+1 = 2+1] 3449 j = {0,0,2} [size = nz = 6] 3450 v = {1,2,3} [size = nz = 6] 3451 3452 Process1 [P1]: rows_owned=[2] 3453 i = {0,3} [size = nrow+1 = 1+1] 3454 j = {0,1,2} [size = nz = 6] 3455 v = {4,5,6} [size = nz = 6] 3456 3457 .keywords: matrix, aij, compressed row, sparse, parallel 3458 3459 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3460 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3461 @*/ 3462 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3463 { 3464 PetscErrorCode ierr; 3465 3466 PetscFunctionBegin; 3467 if (i[0]) { 3468 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3469 } 3470 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3471 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3472 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3473 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3474 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3475 PetscFunctionReturn(0); 3476 } 3477 3478 #undef __FUNCT__ 3479 #define __FUNCT__ "MatCreateMPIAIJ" 3480 /*@C 3481 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3482 (the default parallel PETSc format). For good matrix assembly performance 3483 the user should preallocate the matrix storage by setting the parameters 3484 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3485 performance can be increased by more than a factor of 50. 3486 3487 Collective on MPI_Comm 3488 3489 Input Parameters: 3490 + comm - MPI communicator 3491 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3492 This value should be the same as the local size used in creating the 3493 y vector for the matrix-vector product y = Ax. 3494 . n - This value should be the same as the local size used in creating the 3495 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3496 calculated if N is given) For square matrices n is almost always m. 3497 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3498 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3499 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3500 (same value is used for all local rows) 3501 . d_nnz - array containing the number of nonzeros in the various rows of the 3502 DIAGONAL portion of the local submatrix (possibly different for each row) 3503 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3504 The size of this array is equal to the number of local rows, i.e 'm'. 3505 You must leave room for the diagonal entry even if it is zero. 3506 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3507 submatrix (same value is used for all local rows). 3508 - o_nnz - array containing the number of nonzeros in the various rows of the 3509 OFF-DIAGONAL portion of the local submatrix (possibly different for 3510 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3511 structure. The size of this array is equal to the number 3512 of local rows, i.e 'm'. 3513 3514 Output Parameter: 3515 . A - the matrix 3516 3517 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3518 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3519 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3520 3521 Notes: 3522 If the *_nnz parameter is given then the *_nz parameter is ignored 3523 3524 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3525 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3526 storage requirements for this matrix. 3527 3528 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3529 processor than it must be used on all processors that share the object for 3530 that argument. 3531 3532 The user MUST specify either the local or global matrix dimensions 3533 (possibly both). 3534 3535 The parallel matrix is partitioned across processors such that the 3536 first m0 rows belong to process 0, the next m1 rows belong to 3537 process 1, the next m2 rows belong to process 2 etc.. where 3538 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3539 values corresponding to [m x N] submatrix. 3540 3541 The columns are logically partitioned with the n0 columns belonging 3542 to 0th partition, the next n1 columns belonging to the next 3543 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3544 3545 The DIAGONAL portion of the local submatrix on any given processor 3546 is the submatrix corresponding to the rows and columns m,n 3547 corresponding to the given processor. i.e diagonal matrix on 3548 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3549 etc. The remaining portion of the local submatrix [m x (N-n)] 3550 constitute the OFF-DIAGONAL portion. The example below better 3551 illustrates this concept. 3552 3553 For a square global matrix we define each processor's diagonal portion 3554 to be its local rows and the corresponding columns (a square submatrix); 3555 each processor's off-diagonal portion encompasses the remainder of the 3556 local matrix (a rectangular submatrix). 3557 3558 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3559 3560 When calling this routine with a single process communicator, a matrix of 3561 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3562 type of communicator, use the construction mechanism: 3563 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3564 3565 By default, this format uses inodes (identical nodes) when possible. 3566 We search for consecutive rows with the same nonzero structure, thereby 3567 reusing matrix information to achieve increased efficiency. 3568 3569 Options Database Keys: 3570 + -mat_no_inode - Do not use inodes 3571 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3572 - -mat_aij_oneindex - Internally use indexing starting at 1 3573 rather than 0. Note that when calling MatSetValues(), 3574 the user still MUST index entries starting at 0! 3575 3576 3577 Example usage: 3578 3579 Consider the following 8x8 matrix with 34 non-zero values, that is 3580 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3581 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3582 as follows: 3583 3584 .vb 3585 1 2 0 | 0 3 0 | 0 4 3586 Proc0 0 5 6 | 7 0 0 | 8 0 3587 9 0 10 | 11 0 0 | 12 0 3588 ------------------------------------- 3589 13 0 14 | 15 16 17 | 0 0 3590 Proc1 0 18 0 | 19 20 21 | 0 0 3591 0 0 0 | 22 23 0 | 24 0 3592 ------------------------------------- 3593 Proc2 25 26 27 | 0 0 28 | 29 0 3594 30 0 0 | 31 32 33 | 0 34 3595 .ve 3596 3597 This can be represented as a collection of submatrices as: 3598 3599 .vb 3600 A B C 3601 D E F 3602 G H I 3603 .ve 3604 3605 Where the submatrices A,B,C are owned by proc0, D,E,F are 3606 owned by proc1, G,H,I are owned by proc2. 3607 3608 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3609 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3610 The 'M','N' parameters are 8,8, and have the same values on all procs. 3611 3612 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3613 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3614 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3615 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3616 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3617 matrix, ans [DF] as another SeqAIJ matrix. 3618 3619 When d_nz, o_nz parameters are specified, d_nz storage elements are 3620 allocated for every row of the local diagonal submatrix, and o_nz 3621 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3622 One way to choose d_nz and o_nz is to use the max nonzerors per local 3623 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3624 In this case, the values of d_nz,o_nz are: 3625 .vb 3626 proc0 : dnz = 2, o_nz = 2 3627 proc1 : dnz = 3, o_nz = 2 3628 proc2 : dnz = 1, o_nz = 4 3629 .ve 3630 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3631 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3632 for proc3. i.e we are using 12+15+10=37 storage locations to store 3633 34 values. 3634 3635 When d_nnz, o_nnz parameters are specified, the storage is specified 3636 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3637 In the above case the values for d_nnz,o_nnz are: 3638 .vb 3639 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3640 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3641 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3642 .ve 3643 Here the space allocated is sum of all the above values i.e 34, and 3644 hence pre-allocation is perfect. 3645 3646 Level: intermediate 3647 3648 .keywords: matrix, aij, compressed row, sparse, parallel 3649 3650 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3651 MPIAIJ, MatCreateMPIAIJWithArrays() 3652 @*/ 3653 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3654 { 3655 PetscErrorCode ierr; 3656 PetscMPIInt size; 3657 3658 PetscFunctionBegin; 3659 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3660 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3661 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3662 if (size > 1) { 3663 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3664 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3665 } else { 3666 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3667 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3668 } 3669 PetscFunctionReturn(0); 3670 } 3671 3672 #undef __FUNCT__ 3673 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3674 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3675 { 3676 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3677 3678 PetscFunctionBegin; 3679 *Ad = a->A; 3680 *Ao = a->B; 3681 *colmap = a->garray; 3682 PetscFunctionReturn(0); 3683 } 3684 3685 #undef __FUNCT__ 3686 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3687 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3688 { 3689 PetscErrorCode ierr; 3690 PetscInt i; 3691 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3692 3693 PetscFunctionBegin; 3694 if (coloring->ctype == IS_COLORING_GLOBAL) { 3695 ISColoringValue *allcolors,*colors; 3696 ISColoring ocoloring; 3697 3698 /* set coloring for diagonal portion */ 3699 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3700 3701 /* set coloring for off-diagonal portion */ 3702 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3703 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3704 for (i=0; i<a->B->cmap->n; i++) { 3705 colors[i] = allcolors[a->garray[i]]; 3706 } 3707 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3708 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3709 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3710 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3711 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3712 ISColoringValue *colors; 3713 PetscInt *larray; 3714 ISColoring ocoloring; 3715 3716 /* set coloring for diagonal portion */ 3717 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3718 for (i=0; i<a->A->cmap->n; i++) { 3719 larray[i] = i + A->cmap->rstart; 3720 } 3721 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3722 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3723 for (i=0; i<a->A->cmap->n; i++) { 3724 colors[i] = coloring->colors[larray[i]]; 3725 } 3726 ierr = PetscFree(larray);CHKERRQ(ierr); 3727 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3728 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3729 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3730 3731 /* set coloring for off-diagonal portion */ 3732 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3733 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3734 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3735 for (i=0; i<a->B->cmap->n; i++) { 3736 colors[i] = coloring->colors[larray[i]]; 3737 } 3738 ierr = PetscFree(larray);CHKERRQ(ierr); 3739 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3740 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3741 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3742 } else { 3743 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3744 } 3745 3746 PetscFunctionReturn(0); 3747 } 3748 3749 #if defined(PETSC_HAVE_ADIC) 3750 #undef __FUNCT__ 3751 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3752 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3753 { 3754 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3755 PetscErrorCode ierr; 3756 3757 PetscFunctionBegin; 3758 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3759 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3760 PetscFunctionReturn(0); 3761 } 3762 #endif 3763 3764 #undef __FUNCT__ 3765 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3766 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3767 { 3768 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3769 PetscErrorCode ierr; 3770 3771 PetscFunctionBegin; 3772 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3773 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3774 PetscFunctionReturn(0); 3775 } 3776 3777 #undef __FUNCT__ 3778 #define __FUNCT__ "MatMerge" 3779 /*@ 3780 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3781 matrices from each processor 3782 3783 Collective on MPI_Comm 3784 3785 Input Parameters: 3786 + comm - the communicators the parallel matrix will live on 3787 . inmat - the input sequential matrices 3788 . n - number of local columns (or PETSC_DECIDE) 3789 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3790 3791 Output Parameter: 3792 . outmat - the parallel matrix generated 3793 3794 Level: advanced 3795 3796 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3797 3798 @*/ 3799 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3800 { 3801 PetscErrorCode ierr; 3802 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3803 PetscInt *indx; 3804 PetscScalar *values; 3805 3806 PetscFunctionBegin; 3807 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3808 if (scall == MAT_INITIAL_MATRIX){ 3809 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3810 if (n == PETSC_DECIDE){ 3811 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3812 } 3813 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3814 rstart -= m; 3815 3816 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3817 for (i=0;i<m;i++) { 3818 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3819 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3820 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3821 } 3822 /* This routine will ONLY return MPIAIJ type matrix */ 3823 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3824 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3825 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3826 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3827 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3828 3829 } else if (scall == MAT_REUSE_MATRIX){ 3830 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3831 } else { 3832 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3833 } 3834 3835 for (i=0;i<m;i++) { 3836 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3837 Ii = i + rstart; 3838 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3839 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3840 } 3841 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3842 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3843 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3844 3845 PetscFunctionReturn(0); 3846 } 3847 3848 #undef __FUNCT__ 3849 #define __FUNCT__ "MatFileSplit" 3850 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3851 { 3852 PetscErrorCode ierr; 3853 PetscMPIInt rank; 3854 PetscInt m,N,i,rstart,nnz; 3855 size_t len; 3856 const PetscInt *indx; 3857 PetscViewer out; 3858 char *name; 3859 Mat B; 3860 const PetscScalar *values; 3861 3862 PetscFunctionBegin; 3863 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3864 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3865 /* Should this be the type of the diagonal block of A? */ 3866 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3867 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3868 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3869 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3870 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3871 for (i=0;i<m;i++) { 3872 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3873 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3874 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3875 } 3876 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3877 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3878 3879 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3880 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3881 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3882 sprintf(name,"%s.%d",outfile,rank); 3883 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3884 ierr = PetscFree(name); 3885 ierr = MatView(B,out);CHKERRQ(ierr); 3886 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3887 ierr = MatDestroy(B);CHKERRQ(ierr); 3888 PetscFunctionReturn(0); 3889 } 3890 3891 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3892 #undef __FUNCT__ 3893 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3894 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3895 { 3896 PetscErrorCode ierr; 3897 Mat_Merge_SeqsToMPI *merge; 3898 PetscContainer container; 3899 3900 PetscFunctionBegin; 3901 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3902 if (container) { 3903 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3904 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3905 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3906 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3907 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3908 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3909 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3910 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3911 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3912 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3913 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3914 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3915 3916 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3917 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3918 } 3919 ierr = PetscFree(merge);CHKERRQ(ierr); 3920 3921 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3922 PetscFunctionReturn(0); 3923 } 3924 3925 #include "../src/mat/utils/freespace.h" 3926 #include "petscbt.h" 3927 3928 #undef __FUNCT__ 3929 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3930 /*@C 3931 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3932 matrices from each processor 3933 3934 Collective on MPI_Comm 3935 3936 Input Parameters: 3937 + comm - the communicators the parallel matrix will live on 3938 . seqmat - the input sequential matrices 3939 . m - number of local rows (or PETSC_DECIDE) 3940 . n - number of local columns (or PETSC_DECIDE) 3941 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3942 3943 Output Parameter: 3944 . mpimat - the parallel matrix generated 3945 3946 Level: advanced 3947 3948 Notes: 3949 The dimensions of the sequential matrix in each processor MUST be the same. 3950 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3951 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3952 @*/ 3953 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3954 { 3955 PetscErrorCode ierr; 3956 MPI_Comm comm=((PetscObject)mpimat)->comm; 3957 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3958 PetscMPIInt size,rank,taga,*len_s; 3959 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 3960 PetscInt proc,m; 3961 PetscInt **buf_ri,**buf_rj; 3962 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3963 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3964 MPI_Request *s_waits,*r_waits; 3965 MPI_Status *status; 3966 MatScalar *aa=a->a; 3967 MatScalar **abuf_r,*ba_i; 3968 Mat_Merge_SeqsToMPI *merge; 3969 PetscContainer container; 3970 3971 PetscFunctionBegin; 3972 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3973 3974 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3975 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3976 3977 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3978 if (container) { 3979 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3980 } 3981 bi = merge->bi; 3982 bj = merge->bj; 3983 buf_ri = merge->buf_ri; 3984 buf_rj = merge->buf_rj; 3985 3986 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3987 owners = merge->rowmap.range; 3988 len_s = merge->len_s; 3989 3990 /* send and recv matrix values */ 3991 /*-----------------------------*/ 3992 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3993 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3994 3995 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3996 for (proc=0,k=0; proc<size; proc++){ 3997 if (!len_s[proc]) continue; 3998 i = owners[proc]; 3999 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4000 k++; 4001 } 4002 4003 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4004 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4005 ierr = PetscFree(status);CHKERRQ(ierr); 4006 4007 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4008 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4009 4010 /* insert mat values of mpimat */ 4011 /*----------------------------*/ 4012 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4013 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4014 nextrow = buf_ri_k + merge->nrecv; 4015 nextai = nextrow + merge->nrecv; 4016 4017 for (k=0; k<merge->nrecv; k++){ 4018 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4019 nrows = *(buf_ri_k[k]); 4020 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4021 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4022 } 4023 4024 /* set values of ba */ 4025 m = merge->rowmap.n; 4026 for (i=0; i<m; i++) { 4027 arow = owners[rank] + i; 4028 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4029 bnzi = bi[i+1] - bi[i]; 4030 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4031 4032 /* add local non-zero vals of this proc's seqmat into ba */ 4033 anzi = ai[arow+1] - ai[arow]; 4034 aj = a->j + ai[arow]; 4035 aa = a->a + ai[arow]; 4036 nextaj = 0; 4037 for (j=0; nextaj<anzi; j++){ 4038 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4039 ba_i[j] += aa[nextaj++]; 4040 } 4041 } 4042 4043 /* add received vals into ba */ 4044 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4045 /* i-th row */ 4046 if (i == *nextrow[k]) { 4047 anzi = *(nextai[k]+1) - *nextai[k]; 4048 aj = buf_rj[k] + *(nextai[k]); 4049 aa = abuf_r[k] + *(nextai[k]); 4050 nextaj = 0; 4051 for (j=0; nextaj<anzi; j++){ 4052 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4053 ba_i[j] += aa[nextaj++]; 4054 } 4055 } 4056 nextrow[k]++; nextai[k]++; 4057 } 4058 } 4059 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4060 } 4061 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4062 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4063 4064 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4065 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4066 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4067 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4068 PetscFunctionReturn(0); 4069 } 4070 4071 #undef __FUNCT__ 4072 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4073 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4074 { 4075 PetscErrorCode ierr; 4076 Mat B_mpi; 4077 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4078 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4079 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4080 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4081 PetscInt len,proc,*dnz,*onz; 4082 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4083 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4084 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4085 MPI_Status *status; 4086 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4087 PetscBT lnkbt; 4088 Mat_Merge_SeqsToMPI *merge; 4089 PetscContainer container; 4090 4091 PetscFunctionBegin; 4092 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4093 4094 /* make sure it is a PETSc comm */ 4095 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4096 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4097 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4098 4099 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4100 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4101 4102 /* determine row ownership */ 4103 /*---------------------------------------------------------*/ 4104 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4105 merge->rowmap.n = m; 4106 merge->rowmap.N = M; 4107 merge->rowmap.bs = 1; 4108 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4109 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4110 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4111 4112 m = merge->rowmap.n; 4113 M = merge->rowmap.N; 4114 owners = merge->rowmap.range; 4115 4116 /* determine the number of messages to send, their lengths */ 4117 /*---------------------------------------------------------*/ 4118 len_s = merge->len_s; 4119 4120 len = 0; /* length of buf_si[] */ 4121 merge->nsend = 0; 4122 for (proc=0; proc<size; proc++){ 4123 len_si[proc] = 0; 4124 if (proc == rank){ 4125 len_s[proc] = 0; 4126 } else { 4127 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4128 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4129 } 4130 if (len_s[proc]) { 4131 merge->nsend++; 4132 nrows = 0; 4133 for (i=owners[proc]; i<owners[proc+1]; i++){ 4134 if (ai[i+1] > ai[i]) nrows++; 4135 } 4136 len_si[proc] = 2*(nrows+1); 4137 len += len_si[proc]; 4138 } 4139 } 4140 4141 /* determine the number and length of messages to receive for ij-structure */ 4142 /*-------------------------------------------------------------------------*/ 4143 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4144 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4145 4146 /* post the Irecv of j-structure */ 4147 /*-------------------------------*/ 4148 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4149 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4150 4151 /* post the Isend of j-structure */ 4152 /*--------------------------------*/ 4153 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4154 sj_waits = si_waits + merge->nsend; 4155 4156 for (proc=0, k=0; proc<size; proc++){ 4157 if (!len_s[proc]) continue; 4158 i = owners[proc]; 4159 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4160 k++; 4161 } 4162 4163 /* receives and sends of j-structure are complete */ 4164 /*------------------------------------------------*/ 4165 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4166 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4167 4168 /* send and recv i-structure */ 4169 /*---------------------------*/ 4170 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4171 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4172 4173 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4174 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4175 for (proc=0,k=0; proc<size; proc++){ 4176 if (!len_s[proc]) continue; 4177 /* form outgoing message for i-structure: 4178 buf_si[0]: nrows to be sent 4179 [1:nrows]: row index (global) 4180 [nrows+1:2*nrows+1]: i-structure index 4181 */ 4182 /*-------------------------------------------*/ 4183 nrows = len_si[proc]/2 - 1; 4184 buf_si_i = buf_si + nrows+1; 4185 buf_si[0] = nrows; 4186 buf_si_i[0] = 0; 4187 nrows = 0; 4188 for (i=owners[proc]; i<owners[proc+1]; i++){ 4189 anzi = ai[i+1] - ai[i]; 4190 if (anzi) { 4191 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4192 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4193 nrows++; 4194 } 4195 } 4196 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4197 k++; 4198 buf_si += len_si[proc]; 4199 } 4200 4201 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4202 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4203 4204 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4205 for (i=0; i<merge->nrecv; i++){ 4206 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4207 } 4208 4209 ierr = PetscFree(len_si);CHKERRQ(ierr); 4210 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4211 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4212 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4213 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4214 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4215 ierr = PetscFree(status);CHKERRQ(ierr); 4216 4217 /* compute a local seq matrix in each processor */ 4218 /*----------------------------------------------*/ 4219 /* allocate bi array and free space for accumulating nonzero column info */ 4220 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4221 bi[0] = 0; 4222 4223 /* create and initialize a linked list */ 4224 nlnk = N+1; 4225 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4226 4227 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4228 len = 0; 4229 len = ai[owners[rank+1]] - ai[owners[rank]]; 4230 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4231 current_space = free_space; 4232 4233 /* determine symbolic info for each local row */ 4234 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4235 nextrow = buf_ri_k + merge->nrecv; 4236 nextai = nextrow + merge->nrecv; 4237 for (k=0; k<merge->nrecv; k++){ 4238 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4239 nrows = *buf_ri_k[k]; 4240 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4241 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4242 } 4243 4244 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4245 len = 0; 4246 for (i=0;i<m;i++) { 4247 bnzi = 0; 4248 /* add local non-zero cols of this proc's seqmat into lnk */ 4249 arow = owners[rank] + i; 4250 anzi = ai[arow+1] - ai[arow]; 4251 aj = a->j + ai[arow]; 4252 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4253 bnzi += nlnk; 4254 /* add received col data into lnk */ 4255 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4256 if (i == *nextrow[k]) { /* i-th row */ 4257 anzi = *(nextai[k]+1) - *nextai[k]; 4258 aj = buf_rj[k] + *nextai[k]; 4259 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4260 bnzi += nlnk; 4261 nextrow[k]++; nextai[k]++; 4262 } 4263 } 4264 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4265 4266 /* if free space is not available, make more free space */ 4267 if (current_space->local_remaining<bnzi) { 4268 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4269 nspacedouble++; 4270 } 4271 /* copy data into free space, then initialize lnk */ 4272 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4273 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4274 4275 current_space->array += bnzi; 4276 current_space->local_used += bnzi; 4277 current_space->local_remaining -= bnzi; 4278 4279 bi[i+1] = bi[i] + bnzi; 4280 } 4281 4282 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4283 4284 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4285 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4286 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4287 4288 /* create symbolic parallel matrix B_mpi */ 4289 /*---------------------------------------*/ 4290 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4291 if (n==PETSC_DECIDE) { 4292 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4293 } else { 4294 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4295 } 4296 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4297 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4298 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4299 4300 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4301 B_mpi->assembled = PETSC_FALSE; 4302 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4303 merge->bi = bi; 4304 merge->bj = bj; 4305 merge->buf_ri = buf_ri; 4306 merge->buf_rj = buf_rj; 4307 merge->coi = PETSC_NULL; 4308 merge->coj = PETSC_NULL; 4309 merge->owners_co = PETSC_NULL; 4310 4311 /* attach the supporting struct to B_mpi for reuse */ 4312 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4313 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4314 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4315 *mpimat = B_mpi; 4316 4317 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4318 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4319 PetscFunctionReturn(0); 4320 } 4321 4322 #undef __FUNCT__ 4323 #define __FUNCT__ "MatMerge_SeqsToMPI" 4324 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4325 { 4326 PetscErrorCode ierr; 4327 4328 PetscFunctionBegin; 4329 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4330 if (scall == MAT_INITIAL_MATRIX){ 4331 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4332 } 4333 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4334 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4335 PetscFunctionReturn(0); 4336 } 4337 4338 #undef __FUNCT__ 4339 #define __FUNCT__ "MatGetLocalMat" 4340 /*@ 4341 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4342 4343 Not Collective 4344 4345 Input Parameters: 4346 + A - the matrix 4347 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4348 4349 Output Parameter: 4350 . A_loc - the local sequential matrix generated 4351 4352 Level: developer 4353 4354 @*/ 4355 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4356 { 4357 PetscErrorCode ierr; 4358 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4359 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4360 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4361 MatScalar *aa=a->a,*ba=b->a,*cam; 4362 PetscScalar *ca; 4363 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4364 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4365 4366 PetscFunctionBegin; 4367 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4368 if (scall == MAT_INITIAL_MATRIX){ 4369 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4370 ci[0] = 0; 4371 for (i=0; i<am; i++){ 4372 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4373 } 4374 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4375 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4376 k = 0; 4377 for (i=0; i<am; i++) { 4378 ncols_o = bi[i+1] - bi[i]; 4379 ncols_d = ai[i+1] - ai[i]; 4380 /* off-diagonal portion of A */ 4381 for (jo=0; jo<ncols_o; jo++) { 4382 col = cmap[*bj]; 4383 if (col >= cstart) break; 4384 cj[k] = col; bj++; 4385 ca[k++] = *ba++; 4386 } 4387 /* diagonal portion of A */ 4388 for (j=0; j<ncols_d; j++) { 4389 cj[k] = cstart + *aj++; 4390 ca[k++] = *aa++; 4391 } 4392 /* off-diagonal portion of A */ 4393 for (j=jo; j<ncols_o; j++) { 4394 cj[k] = cmap[*bj++]; 4395 ca[k++] = *ba++; 4396 } 4397 } 4398 /* put together the new matrix */ 4399 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4400 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4401 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4402 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4403 mat->free_a = PETSC_TRUE; 4404 mat->free_ij = PETSC_TRUE; 4405 mat->nonew = 0; 4406 } else if (scall == MAT_REUSE_MATRIX){ 4407 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4408 ci = mat->i; cj = mat->j; cam = mat->a; 4409 for (i=0; i<am; i++) { 4410 /* off-diagonal portion of A */ 4411 ncols_o = bi[i+1] - bi[i]; 4412 for (jo=0; jo<ncols_o; jo++) { 4413 col = cmap[*bj]; 4414 if (col >= cstart) break; 4415 *cam++ = *ba++; bj++; 4416 } 4417 /* diagonal portion of A */ 4418 ncols_d = ai[i+1] - ai[i]; 4419 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4420 /* off-diagonal portion of A */ 4421 for (j=jo; j<ncols_o; j++) { 4422 *cam++ = *ba++; bj++; 4423 } 4424 } 4425 } else { 4426 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4427 } 4428 4429 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4430 PetscFunctionReturn(0); 4431 } 4432 4433 #undef __FUNCT__ 4434 #define __FUNCT__ "MatGetLocalMatCondensed" 4435 /*@C 4436 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4437 4438 Not Collective 4439 4440 Input Parameters: 4441 + A - the matrix 4442 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4443 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4444 4445 Output Parameter: 4446 . A_loc - the local sequential matrix generated 4447 4448 Level: developer 4449 4450 @*/ 4451 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4452 { 4453 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4454 PetscErrorCode ierr; 4455 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4456 IS isrowa,iscola; 4457 Mat *aloc; 4458 4459 PetscFunctionBegin; 4460 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4461 if (!row){ 4462 start = A->rmap->rstart; end = A->rmap->rend; 4463 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4464 } else { 4465 isrowa = *row; 4466 } 4467 if (!col){ 4468 start = A->cmap->rstart; 4469 cmap = a->garray; 4470 nzA = a->A->cmap->n; 4471 nzB = a->B->cmap->n; 4472 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4473 ncols = 0; 4474 for (i=0; i<nzB; i++) { 4475 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4476 else break; 4477 } 4478 imark = i; 4479 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4480 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4481 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4482 ierr = PetscFree(idx);CHKERRQ(ierr); 4483 } else { 4484 iscola = *col; 4485 } 4486 if (scall != MAT_INITIAL_MATRIX){ 4487 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4488 aloc[0] = *A_loc; 4489 } 4490 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4491 *A_loc = aloc[0]; 4492 ierr = PetscFree(aloc);CHKERRQ(ierr); 4493 if (!row){ 4494 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4495 } 4496 if (!col){ 4497 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4498 } 4499 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4500 PetscFunctionReturn(0); 4501 } 4502 4503 #undef __FUNCT__ 4504 #define __FUNCT__ "MatGetBrowsOfAcols" 4505 /*@C 4506 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4507 4508 Collective on Mat 4509 4510 Input Parameters: 4511 + A,B - the matrices in mpiaij format 4512 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4513 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4514 4515 Output Parameter: 4516 + rowb, colb - index sets of rows and columns of B to extract 4517 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4518 - B_seq - the sequential matrix generated 4519 4520 Level: developer 4521 4522 @*/ 4523 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4524 { 4525 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4526 PetscErrorCode ierr; 4527 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4528 IS isrowb,iscolb; 4529 Mat *bseq; 4530 4531 PetscFunctionBegin; 4532 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4533 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4534 } 4535 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4536 4537 if (scall == MAT_INITIAL_MATRIX){ 4538 start = A->cmap->rstart; 4539 cmap = a->garray; 4540 nzA = a->A->cmap->n; 4541 nzB = a->B->cmap->n; 4542 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4543 ncols = 0; 4544 for (i=0; i<nzB; i++) { /* row < local row index */ 4545 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4546 else break; 4547 } 4548 imark = i; 4549 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4550 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4551 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4552 ierr = PetscFree(idx);CHKERRQ(ierr); 4553 *brstart = imark; 4554 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4555 } else { 4556 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4557 isrowb = *rowb; iscolb = *colb; 4558 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4559 bseq[0] = *B_seq; 4560 } 4561 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4562 *B_seq = bseq[0]; 4563 ierr = PetscFree(bseq);CHKERRQ(ierr); 4564 if (!rowb){ 4565 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4566 } else { 4567 *rowb = isrowb; 4568 } 4569 if (!colb){ 4570 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4571 } else { 4572 *colb = iscolb; 4573 } 4574 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4575 PetscFunctionReturn(0); 4576 } 4577 4578 #undef __FUNCT__ 4579 #define __FUNCT__ "MatGetBrowsOfAoCols" 4580 /*@C 4581 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4582 of the OFF-DIAGONAL portion of local A 4583 4584 Collective on Mat 4585 4586 Input Parameters: 4587 + A,B - the matrices in mpiaij format 4588 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4589 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4590 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4591 4592 Output Parameter: 4593 + B_oth - the sequential matrix generated 4594 4595 Level: developer 4596 4597 @*/ 4598 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4599 { 4600 VecScatter_MPI_General *gen_to,*gen_from; 4601 PetscErrorCode ierr; 4602 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4603 Mat_SeqAIJ *b_oth; 4604 VecScatter ctx=a->Mvctx; 4605 MPI_Comm comm=((PetscObject)ctx)->comm; 4606 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4607 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4608 PetscScalar *rvalues,*svalues; 4609 MatScalar *b_otha,*bufa,*bufA; 4610 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4611 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4612 MPI_Status *sstatus,rstatus; 4613 PetscMPIInt jj; 4614 PetscInt *cols,sbs,rbs; 4615 PetscScalar *vals; 4616 4617 PetscFunctionBegin; 4618 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4619 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4620 } 4621 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4622 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4623 4624 gen_to = (VecScatter_MPI_General*)ctx->todata; 4625 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4626 rvalues = gen_from->values; /* holds the length of receiving row */ 4627 svalues = gen_to->values; /* holds the length of sending row */ 4628 nrecvs = gen_from->n; 4629 nsends = gen_to->n; 4630 4631 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4632 srow = gen_to->indices; /* local row index to be sent */ 4633 sstarts = gen_to->starts; 4634 sprocs = gen_to->procs; 4635 sstatus = gen_to->sstatus; 4636 sbs = gen_to->bs; 4637 rstarts = gen_from->starts; 4638 rprocs = gen_from->procs; 4639 rbs = gen_from->bs; 4640 4641 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4642 if (scall == MAT_INITIAL_MATRIX){ 4643 /* i-array */ 4644 /*---------*/ 4645 /* post receives */ 4646 for (i=0; i<nrecvs; i++){ 4647 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4648 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4649 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4650 } 4651 4652 /* pack the outgoing message */ 4653 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4654 rstartsj = sstartsj + nsends +1; 4655 sstartsj[0] = 0; rstartsj[0] = 0; 4656 len = 0; /* total length of j or a array to be sent */ 4657 k = 0; 4658 for (i=0; i<nsends; i++){ 4659 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4660 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4661 for (j=0; j<nrows; j++) { 4662 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4663 for (l=0; l<sbs; l++){ 4664 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4665 rowlen[j*sbs+l] = ncols; 4666 len += ncols; 4667 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4668 } 4669 k++; 4670 } 4671 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4672 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4673 } 4674 /* recvs and sends of i-array are completed */ 4675 i = nrecvs; 4676 while (i--) { 4677 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4678 } 4679 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4680 4681 /* allocate buffers for sending j and a arrays */ 4682 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4683 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4684 4685 /* create i-array of B_oth */ 4686 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4687 b_othi[0] = 0; 4688 len = 0; /* total length of j or a array to be received */ 4689 k = 0; 4690 for (i=0; i<nrecvs; i++){ 4691 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4692 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4693 for (j=0; j<nrows; j++) { 4694 b_othi[k+1] = b_othi[k] + rowlen[j]; 4695 len += rowlen[j]; k++; 4696 } 4697 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4698 } 4699 4700 /* allocate space for j and a arrrays of B_oth */ 4701 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4702 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4703 4704 /* j-array */ 4705 /*---------*/ 4706 /* post receives of j-array */ 4707 for (i=0; i<nrecvs; i++){ 4708 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4709 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4710 } 4711 4712 /* pack the outgoing message j-array */ 4713 k = 0; 4714 for (i=0; i<nsends; i++){ 4715 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4716 bufJ = bufj+sstartsj[i]; 4717 for (j=0; j<nrows; j++) { 4718 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4719 for (ll=0; ll<sbs; ll++){ 4720 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4721 for (l=0; l<ncols; l++){ 4722 *bufJ++ = cols[l]; 4723 } 4724 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4725 } 4726 } 4727 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4728 } 4729 4730 /* recvs and sends of j-array are completed */ 4731 i = nrecvs; 4732 while (i--) { 4733 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4734 } 4735 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4736 } else if (scall == MAT_REUSE_MATRIX){ 4737 sstartsj = *startsj; 4738 rstartsj = sstartsj + nsends +1; 4739 bufa = *bufa_ptr; 4740 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4741 b_otha = b_oth->a; 4742 } else { 4743 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4744 } 4745 4746 /* a-array */ 4747 /*---------*/ 4748 /* post receives of a-array */ 4749 for (i=0; i<nrecvs; i++){ 4750 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4751 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4752 } 4753 4754 /* pack the outgoing message a-array */ 4755 k = 0; 4756 for (i=0; i<nsends; i++){ 4757 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4758 bufA = bufa+sstartsj[i]; 4759 for (j=0; j<nrows; j++) { 4760 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4761 for (ll=0; ll<sbs; ll++){ 4762 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4763 for (l=0; l<ncols; l++){ 4764 *bufA++ = vals[l]; 4765 } 4766 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4767 } 4768 } 4769 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4770 } 4771 /* recvs and sends of a-array are completed */ 4772 i = nrecvs; 4773 while (i--) { 4774 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4775 } 4776 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4777 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4778 4779 if (scall == MAT_INITIAL_MATRIX){ 4780 /* put together the new matrix */ 4781 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4782 4783 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4784 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4785 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4786 b_oth->free_a = PETSC_TRUE; 4787 b_oth->free_ij = PETSC_TRUE; 4788 b_oth->nonew = 0; 4789 4790 ierr = PetscFree(bufj);CHKERRQ(ierr); 4791 if (!startsj || !bufa_ptr){ 4792 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4793 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4794 } else { 4795 *startsj = sstartsj; 4796 *bufa_ptr = bufa; 4797 } 4798 } 4799 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4800 PetscFunctionReturn(0); 4801 } 4802 4803 #undef __FUNCT__ 4804 #define __FUNCT__ "MatGetCommunicationStructs" 4805 /*@C 4806 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4807 4808 Not Collective 4809 4810 Input Parameters: 4811 . A - The matrix in mpiaij format 4812 4813 Output Parameter: 4814 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4815 . colmap - A map from global column index to local index into lvec 4816 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4817 4818 Level: developer 4819 4820 @*/ 4821 #if defined (PETSC_USE_CTABLE) 4822 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4823 #else 4824 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4825 #endif 4826 { 4827 Mat_MPIAIJ *a; 4828 4829 PetscFunctionBegin; 4830 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4831 PetscValidPointer(lvec, 2) 4832 PetscValidPointer(colmap, 3) 4833 PetscValidPointer(multScatter, 4) 4834 a = (Mat_MPIAIJ *) A->data; 4835 if (lvec) *lvec = a->lvec; 4836 if (colmap) *colmap = a->colmap; 4837 if (multScatter) *multScatter = a->Mvctx; 4838 PetscFunctionReturn(0); 4839 } 4840 4841 EXTERN_C_BEGIN 4842 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4843 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4844 EXTERN_C_END 4845 4846 #include "../src/mat/impls/dense/mpi/mpidense.h" 4847 4848 #undef __FUNCT__ 4849 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4850 /* 4851 Computes (B'*A')' since computing B*A directly is untenable 4852 4853 n p p 4854 ( ) ( ) ( ) 4855 m ( A ) * n ( B ) = m ( C ) 4856 ( ) ( ) ( ) 4857 4858 */ 4859 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4860 { 4861 PetscErrorCode ierr; 4862 Mat At,Bt,Ct; 4863 4864 PetscFunctionBegin; 4865 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4866 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4867 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4868 ierr = MatDestroy(At);CHKERRQ(ierr); 4869 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4870 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4871 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4872 PetscFunctionReturn(0); 4873 } 4874 4875 #undef __FUNCT__ 4876 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4877 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4878 { 4879 PetscErrorCode ierr; 4880 PetscInt m=A->rmap->n,n=B->cmap->n; 4881 Mat Cmat; 4882 4883 PetscFunctionBegin; 4884 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4885 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4886 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4887 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4888 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4889 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4890 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4891 *C = Cmat; 4892 PetscFunctionReturn(0); 4893 } 4894 4895 /* ----------------------------------------------------------------*/ 4896 #undef __FUNCT__ 4897 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4898 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4899 { 4900 PetscErrorCode ierr; 4901 4902 PetscFunctionBegin; 4903 if (scall == MAT_INITIAL_MATRIX){ 4904 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4905 } 4906 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4907 PetscFunctionReturn(0); 4908 } 4909 4910 EXTERN_C_BEGIN 4911 #if defined(PETSC_HAVE_MUMPS) 4912 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4913 #endif 4914 #if defined(PETSC_HAVE_PASTIX) 4915 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4916 #endif 4917 #if defined(PETSC_HAVE_SUPERLU_DIST) 4918 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4919 #endif 4920 #if defined(PETSC_HAVE_SPOOLES) 4921 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4922 #endif 4923 EXTERN_C_END 4924 4925 /*MC 4926 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4927 4928 Options Database Keys: 4929 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4930 4931 Level: beginner 4932 4933 .seealso: MatCreateMPIAIJ() 4934 M*/ 4935 4936 EXTERN_C_BEGIN 4937 #undef __FUNCT__ 4938 #define __FUNCT__ "MatCreate_MPIAIJ" 4939 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4940 { 4941 Mat_MPIAIJ *b; 4942 PetscErrorCode ierr; 4943 PetscMPIInt size; 4944 4945 PetscFunctionBegin; 4946 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4947 4948 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4949 B->data = (void*)b; 4950 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4951 B->rmap->bs = 1; 4952 B->assembled = PETSC_FALSE; 4953 B->mapping = 0; 4954 4955 B->insertmode = NOT_SET_VALUES; 4956 b->size = size; 4957 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4958 4959 /* build cache for off array entries formed */ 4960 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4961 b->donotstash = PETSC_FALSE; 4962 b->colmap = 0; 4963 b->garray = 0; 4964 b->roworiented = PETSC_TRUE; 4965 4966 /* stuff used for matrix vector multiply */ 4967 b->lvec = PETSC_NULL; 4968 b->Mvctx = PETSC_NULL; 4969 4970 /* stuff for MatGetRow() */ 4971 b->rowindices = 0; 4972 b->rowvalues = 0; 4973 b->getrowactive = PETSC_FALSE; 4974 4975 #if defined(PETSC_HAVE_SPOOLES) 4976 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 4977 "MatGetFactor_mpiaij_spooles", 4978 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 4979 #endif 4980 #if defined(PETSC_HAVE_MUMPS) 4981 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 4982 "MatGetFactor_mpiaij_mumps", 4983 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 4984 #endif 4985 #if defined(PETSC_HAVE_PASTIX) 4986 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 4987 "MatGetFactor_mpiaij_pastix", 4988 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 4989 #endif 4990 #if defined(PETSC_HAVE_SUPERLU_DIST) 4991 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 4992 "MatGetFactor_mpiaij_superlu_dist", 4993 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 4994 #endif 4995 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4996 "MatStoreValues_MPIAIJ", 4997 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4998 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4999 "MatRetrieveValues_MPIAIJ", 5000 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5001 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5002 "MatGetDiagonalBlock_MPIAIJ", 5003 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5004 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5005 "MatIsTranspose_MPIAIJ", 5006 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5007 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5008 "MatMPIAIJSetPreallocation_MPIAIJ", 5009 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5010 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5011 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5012 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5013 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5014 "MatDiagonalScaleLocal_MPIAIJ", 5015 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5016 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5017 "MatConvert_MPIAIJ_MPICSRPERM", 5018 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5019 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5020 "MatConvert_MPIAIJ_MPICRL", 5021 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5022 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5023 "MatMatMult_MPIDense_MPIAIJ", 5024 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5025 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5026 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5027 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5028 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5029 "MatMatMultNumeric_MPIDense_MPIAIJ", 5030 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5031 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5032 PetscFunctionReturn(0); 5033 } 5034 EXTERN_C_END 5035 5036 #undef __FUNCT__ 5037 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5038 /*@ 5039 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5040 and "off-diagonal" part of the matrix in CSR format. 5041 5042 Collective on MPI_Comm 5043 5044 Input Parameters: 5045 + comm - MPI communicator 5046 . m - number of local rows (Cannot be PETSC_DECIDE) 5047 . n - This value should be the same as the local size used in creating the 5048 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5049 calculated if N is given) For square matrices n is almost always m. 5050 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5051 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5052 . i - row indices for "diagonal" portion of matrix 5053 . j - column indices 5054 . a - matrix values 5055 . oi - row indices for "off-diagonal" portion of matrix 5056 . oj - column indices 5057 - oa - matrix values 5058 5059 Output Parameter: 5060 . mat - the matrix 5061 5062 Level: advanced 5063 5064 Notes: 5065 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5066 5067 The i and j indices are 0 based 5068 5069 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5070 5071 5072 .keywords: matrix, aij, compressed row, sparse, parallel 5073 5074 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5075 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5076 @*/ 5077 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5078 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5079 { 5080 PetscErrorCode ierr; 5081 Mat_MPIAIJ *maij; 5082 5083 PetscFunctionBegin; 5084 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5085 if (i[0]) { 5086 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5087 } 5088 if (oi[0]) { 5089 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5090 } 5091 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5092 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5093 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5094 maij = (Mat_MPIAIJ*) (*mat)->data; 5095 maij->donotstash = PETSC_TRUE; 5096 (*mat)->preallocated = PETSC_TRUE; 5097 5098 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5099 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5100 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5101 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5102 5103 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5104 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5105 5106 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5107 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5108 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5109 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5110 5111 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5112 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5113 PetscFunctionReturn(0); 5114 } 5115 5116 /* 5117 Special version for direct calls from Fortran 5118 */ 5119 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5120 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5121 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5122 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5123 #endif 5124 5125 /* Change these macros so can be used in void function */ 5126 #undef CHKERRQ 5127 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5128 #undef SETERRQ2 5129 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5130 #undef SETERRQ 5131 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5132 5133 EXTERN_C_BEGIN 5134 #undef __FUNCT__ 5135 #define __FUNCT__ "matsetvaluesmpiaij_" 5136 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5137 { 5138 Mat mat = *mmat; 5139 PetscInt m = *mm, n = *mn; 5140 InsertMode addv = *maddv; 5141 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5142 PetscScalar value; 5143 PetscErrorCode ierr; 5144 5145 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5146 if (mat->insertmode == NOT_SET_VALUES) { 5147 mat->insertmode = addv; 5148 } 5149 #if defined(PETSC_USE_DEBUG) 5150 else if (mat->insertmode != addv) { 5151 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5152 } 5153 #endif 5154 { 5155 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5156 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5157 PetscTruth roworiented = aij->roworiented; 5158 5159 /* Some Variables required in the macro */ 5160 Mat A = aij->A; 5161 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5162 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5163 MatScalar *aa = a->a; 5164 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5165 Mat B = aij->B; 5166 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5167 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5168 MatScalar *ba = b->a; 5169 5170 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5171 PetscInt nonew = a->nonew; 5172 MatScalar *ap1,*ap2; 5173 5174 PetscFunctionBegin; 5175 for (i=0; i<m; i++) { 5176 if (im[i] < 0) continue; 5177 #if defined(PETSC_USE_DEBUG) 5178 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5179 #endif 5180 if (im[i] >= rstart && im[i] < rend) { 5181 row = im[i] - rstart; 5182 lastcol1 = -1; 5183 rp1 = aj + ai[row]; 5184 ap1 = aa + ai[row]; 5185 rmax1 = aimax[row]; 5186 nrow1 = ailen[row]; 5187 low1 = 0; 5188 high1 = nrow1; 5189 lastcol2 = -1; 5190 rp2 = bj + bi[row]; 5191 ap2 = ba + bi[row]; 5192 rmax2 = bimax[row]; 5193 nrow2 = bilen[row]; 5194 low2 = 0; 5195 high2 = nrow2; 5196 5197 for (j=0; j<n; j++) { 5198 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5199 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5200 if (in[j] >= cstart && in[j] < cend){ 5201 col = in[j] - cstart; 5202 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5203 } else if (in[j] < 0) continue; 5204 #if defined(PETSC_USE_DEBUG) 5205 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5206 #endif 5207 else { 5208 if (mat->was_assembled) { 5209 if (!aij->colmap) { 5210 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5211 } 5212 #if defined (PETSC_USE_CTABLE) 5213 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5214 col--; 5215 #else 5216 col = aij->colmap[in[j]] - 1; 5217 #endif 5218 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5219 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5220 col = in[j]; 5221 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5222 B = aij->B; 5223 b = (Mat_SeqAIJ*)B->data; 5224 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5225 rp2 = bj + bi[row]; 5226 ap2 = ba + bi[row]; 5227 rmax2 = bimax[row]; 5228 nrow2 = bilen[row]; 5229 low2 = 0; 5230 high2 = nrow2; 5231 bm = aij->B->rmap->n; 5232 ba = b->a; 5233 } 5234 } else col = in[j]; 5235 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5236 } 5237 } 5238 } else { 5239 if (!aij->donotstash) { 5240 if (roworiented) { 5241 if (ignorezeroentries && v[i*n] == 0.0) continue; 5242 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5243 } else { 5244 if (ignorezeroentries && v[i] == 0.0) continue; 5245 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5246 } 5247 } 5248 } 5249 }} 5250 PetscFunctionReturnVoid(); 5251 } 5252 EXTERN_C_END 5253 5254