1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "../src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap->N; 932 header[2] = mat->cmap->N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap->n; 938 range = mat->rmap->range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap->n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap->n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap->n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap->n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap->N > 1024) { 1114 PetscTruth flg = PETSC_FALSE; 1115 1116 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1117 if (!flg) { 1118 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1119 } 1120 } 1121 1122 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1123 if (!rank) { 1124 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1125 } else { 1126 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1127 } 1128 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1129 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1130 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1132 1133 /* copy over the A part */ 1134 Aloc = (Mat_SeqAIJ*)aij->A->data; 1135 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1136 row = mat->rmap->rstart; 1137 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1138 for (i=0; i<m; i++) { 1139 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1140 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1141 } 1142 aj = Aloc->j; 1143 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1144 1145 /* copy over the B part */ 1146 Aloc = (Mat_SeqAIJ*)aij->B->data; 1147 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148 row = mat->rmap->rstart; 1149 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1150 ct = cols; 1151 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1155 } 1156 ierr = PetscFree(ct);CHKERRQ(ierr); 1157 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1158 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1159 /* 1160 Everyone has to call to draw the matrix since the graphics waits are 1161 synchronized across all processors that share the PetscDraw object 1162 */ 1163 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1164 if (!rank) { 1165 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1166 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1167 } 1168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169 ierr = MatDestroy(A);CHKERRQ(ierr); 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatView_MPIAIJ" 1176 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1177 { 1178 PetscErrorCode ierr; 1179 PetscTruth iascii,isdraw,issocket,isbinary; 1180 1181 PetscFunctionBegin; 1182 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1183 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1184 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1185 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1186 if (iascii || isdraw || isbinary || issocket) { 1187 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1188 } else { 1189 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatRelax_MPIAIJ" 1196 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1197 { 1198 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1199 PetscErrorCode ierr; 1200 Vec bb1; 1201 1202 PetscFunctionBegin; 1203 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1204 1205 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1206 if (flag & SOR_ZERO_INITIAL_GUESS) { 1207 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1208 its--; 1209 } 1210 1211 while (its--) { 1212 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 1215 /* update rhs: bb1 = bb - B*x */ 1216 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1217 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1218 1219 /* local sweep */ 1220 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1221 } 1222 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1223 if (flag & SOR_ZERO_INITIAL_GUESS) { 1224 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1225 its--; 1226 } 1227 while (its--) { 1228 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 1231 /* update rhs: bb1 = bb - B*x */ 1232 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1233 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1234 1235 /* local sweep */ 1236 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1237 } 1238 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1239 if (flag & SOR_ZERO_INITIAL_GUESS) { 1240 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1241 its--; 1242 } 1243 while (its--) { 1244 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 1247 /* update rhs: bb1 = bb - B*x */ 1248 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1249 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1250 1251 /* local sweep */ 1252 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1253 } 1254 } else { 1255 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1256 } 1257 1258 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatPermute_MPIAIJ" 1264 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1265 { 1266 MPI_Comm comm,pcomm; 1267 PetscInt first,local_size,nrows; 1268 const PetscInt *rows; 1269 PetscMPIInt size; 1270 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1275 /* make a collective version of 'rowp' */ 1276 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1277 if (pcomm==comm) { 1278 crowp = rowp; 1279 } else { 1280 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1281 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1282 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1283 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1284 } 1285 /* collect the global row permutation and invert it */ 1286 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1287 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1288 if (pcomm!=comm) { 1289 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1290 } 1291 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1292 /* get the local target indices */ 1293 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1294 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1295 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1296 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1297 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1298 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1299 /* the column permutation is so much easier; 1300 make a local version of 'colp' and invert it */ 1301 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1302 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1303 if (size==1) { 1304 lcolp = colp; 1305 } else { 1306 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1307 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1308 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1309 } 1310 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1311 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1312 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1313 if (size>1) { 1314 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1315 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1316 } 1317 /* now we just get the submatrix */ 1318 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1319 /* clean up */ 1320 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1321 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1327 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1328 { 1329 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1330 Mat A = mat->A,B = mat->B; 1331 PetscErrorCode ierr; 1332 PetscReal isend[5],irecv[5]; 1333 1334 PetscFunctionBegin; 1335 info->block_size = 1.0; 1336 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1337 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1338 isend[3] = info->memory; isend[4] = info->mallocs; 1339 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1340 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1341 isend[3] += info->memory; isend[4] += info->mallocs; 1342 if (flag == MAT_LOCAL) { 1343 info->nz_used = isend[0]; 1344 info->nz_allocated = isend[1]; 1345 info->nz_unneeded = isend[2]; 1346 info->memory = isend[3]; 1347 info->mallocs = isend[4]; 1348 } else if (flag == MAT_GLOBAL_MAX) { 1349 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1350 info->nz_used = irecv[0]; 1351 info->nz_allocated = irecv[1]; 1352 info->nz_unneeded = irecv[2]; 1353 info->memory = irecv[3]; 1354 info->mallocs = irecv[4]; 1355 } else if (flag == MAT_GLOBAL_SUM) { 1356 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1357 info->nz_used = irecv[0]; 1358 info->nz_allocated = irecv[1]; 1359 info->nz_unneeded = irecv[2]; 1360 info->memory = irecv[3]; 1361 info->mallocs = irecv[4]; 1362 } 1363 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1364 info->fill_ratio_needed = 0; 1365 info->factor_mallocs = 0; 1366 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatSetOption_MPIAIJ" 1372 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1373 { 1374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 switch (op) { 1379 case MAT_NEW_NONZERO_LOCATIONS: 1380 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1381 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1382 case MAT_KEEP_ZEROED_ROWS: 1383 case MAT_NEW_NONZERO_LOCATION_ERR: 1384 case MAT_USE_INODES: 1385 case MAT_IGNORE_ZERO_ENTRIES: 1386 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1387 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1388 break; 1389 case MAT_ROW_ORIENTED: 1390 a->roworiented = flg; 1391 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1392 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1393 break; 1394 case MAT_NEW_DIAGONALS: 1395 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1396 break; 1397 case MAT_IGNORE_OFF_PROC_ENTRIES: 1398 a->donotstash = PETSC_TRUE; 1399 break; 1400 case MAT_SYMMETRIC: 1401 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1402 break; 1403 case MAT_STRUCTURALLY_SYMMETRIC: 1404 case MAT_HERMITIAN: 1405 case MAT_SYMMETRY_ETERNAL: 1406 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1407 break; 1408 default: 1409 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatGetRow_MPIAIJ" 1416 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1417 { 1418 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1419 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1420 PetscErrorCode ierr; 1421 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1422 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1423 PetscInt *cmap,*idx_p; 1424 1425 PetscFunctionBegin; 1426 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1427 mat->getrowactive = PETSC_TRUE; 1428 1429 if (!mat->rowvalues && (idx || v)) { 1430 /* 1431 allocate enough space to hold information from the longest row. 1432 */ 1433 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1434 PetscInt max = 1,tmp; 1435 for (i=0; i<matin->rmap->n; i++) { 1436 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1437 if (max < tmp) { max = tmp; } 1438 } 1439 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1440 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1441 } 1442 1443 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1444 lrow = row - rstart; 1445 1446 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1447 if (!v) {pvA = 0; pvB = 0;} 1448 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1449 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1450 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1451 nztot = nzA + nzB; 1452 1453 cmap = mat->garray; 1454 if (v || idx) { 1455 if (nztot) { 1456 /* Sort by increasing column numbers, assuming A and B already sorted */ 1457 PetscInt imark = -1; 1458 if (v) { 1459 *v = v_p = mat->rowvalues; 1460 for (i=0; i<nzB; i++) { 1461 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1462 else break; 1463 } 1464 imark = i; 1465 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1466 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1467 } 1468 if (idx) { 1469 *idx = idx_p = mat->rowindices; 1470 if (imark > -1) { 1471 for (i=0; i<imark; i++) { 1472 idx_p[i] = cmap[cworkB[i]]; 1473 } 1474 } else { 1475 for (i=0; i<nzB; i++) { 1476 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1477 else break; 1478 } 1479 imark = i; 1480 } 1481 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1482 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1483 } 1484 } else { 1485 if (idx) *idx = 0; 1486 if (v) *v = 0; 1487 } 1488 } 1489 *nz = nztot; 1490 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1491 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1497 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1498 { 1499 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1500 1501 PetscFunctionBegin; 1502 if (!aij->getrowactive) { 1503 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1504 } 1505 aij->getrowactive = PETSC_FALSE; 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatNorm_MPIAIJ" 1511 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1512 { 1513 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1514 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1515 PetscErrorCode ierr; 1516 PetscInt i,j,cstart = mat->cmap->rstart; 1517 PetscReal sum = 0.0; 1518 MatScalar *v; 1519 1520 PetscFunctionBegin; 1521 if (aij->size == 1) { 1522 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1523 } else { 1524 if (type == NORM_FROBENIUS) { 1525 v = amat->a; 1526 for (i=0; i<amat->nz; i++) { 1527 #if defined(PETSC_USE_COMPLEX) 1528 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1529 #else 1530 sum += (*v)*(*v); v++; 1531 #endif 1532 } 1533 v = bmat->a; 1534 for (i=0; i<bmat->nz; i++) { 1535 #if defined(PETSC_USE_COMPLEX) 1536 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1537 #else 1538 sum += (*v)*(*v); v++; 1539 #endif 1540 } 1541 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1542 *norm = sqrt(*norm); 1543 } else if (type == NORM_1) { /* max column norm */ 1544 PetscReal *tmp,*tmp2; 1545 PetscInt *jj,*garray = aij->garray; 1546 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1547 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1548 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1549 *norm = 0.0; 1550 v = amat->a; jj = amat->j; 1551 for (j=0; j<amat->nz; j++) { 1552 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1553 } 1554 v = bmat->a; jj = bmat->j; 1555 for (j=0; j<bmat->nz; j++) { 1556 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1557 } 1558 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1559 for (j=0; j<mat->cmap->N; j++) { 1560 if (tmp2[j] > *norm) *norm = tmp2[j]; 1561 } 1562 ierr = PetscFree(tmp);CHKERRQ(ierr); 1563 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1564 } else if (type == NORM_INFINITY) { /* max row norm */ 1565 PetscReal ntemp = 0.0; 1566 for (j=0; j<aij->A->rmap->n; j++) { 1567 v = amat->a + amat->i[j]; 1568 sum = 0.0; 1569 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1570 sum += PetscAbsScalar(*v); v++; 1571 } 1572 v = bmat->a + bmat->i[j]; 1573 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1574 sum += PetscAbsScalar(*v); v++; 1575 } 1576 if (sum > ntemp) ntemp = sum; 1577 } 1578 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1579 } else { 1580 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1581 } 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatTranspose_MPIAIJ" 1588 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1589 { 1590 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1591 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1592 PetscErrorCode ierr; 1593 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1594 PetscInt cstart=A->cmap->rstart,ncol; 1595 Mat B; 1596 MatScalar *array; 1597 1598 PetscFunctionBegin; 1599 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1600 1601 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1602 ai = Aloc->i; aj = Aloc->j; 1603 bi = Bloc->i; bj = Bloc->j; 1604 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1605 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1606 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1607 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1608 for (i=0; i<ai[ma]; i++){ 1609 d_nnz[aj[i]] ++; 1610 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1611 } 1612 1613 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1614 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1615 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1616 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1617 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1618 } else { 1619 B = *matout; 1620 } 1621 1622 /* copy over the A part */ 1623 array = Aloc->a; 1624 row = A->rmap->rstart; 1625 for (i=0; i<ma; i++) { 1626 ncol = ai[i+1]-ai[i]; 1627 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1628 row++; array += ncol; aj += ncol; 1629 } 1630 aj = Aloc->j; 1631 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1632 1633 /* copy over the B part */ 1634 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1635 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1636 array = Bloc->a; 1637 row = A->rmap->rstart; 1638 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1639 cols_tmp = cols; 1640 for (i=0; i<mb; i++) { 1641 ncol = bi[i+1]-bi[i]; 1642 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1643 row++; array += ncol; cols_tmp += ncol; 1644 } 1645 ierr = PetscFree(cols);CHKERRQ(ierr); 1646 1647 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1648 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1649 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1650 *matout = B; 1651 } else { 1652 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1653 } 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1659 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1660 { 1661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1662 Mat a = aij->A,b = aij->B; 1663 PetscErrorCode ierr; 1664 PetscInt s1,s2,s3; 1665 1666 PetscFunctionBegin; 1667 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1668 if (rr) { 1669 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1670 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1671 /* Overlap communication with computation. */ 1672 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 } 1674 if (ll) { 1675 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1676 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1677 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1678 } 1679 /* scale the diagonal block */ 1680 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1681 1682 if (rr) { 1683 /* Do a scatter end and then right scale the off-diagonal block */ 1684 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1685 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1686 } 1687 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1693 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1694 { 1695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1696 PetscErrorCode ierr; 1697 1698 PetscFunctionBegin; 1699 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1700 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1705 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1706 { 1707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1708 PetscErrorCode ierr; 1709 1710 PetscFunctionBegin; 1711 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatEqual_MPIAIJ" 1717 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1718 { 1719 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1720 Mat a,b,c,d; 1721 PetscTruth flg; 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 a = matA->A; b = matA->B; 1726 c = matB->A; d = matB->B; 1727 1728 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1729 if (flg) { 1730 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1731 } 1732 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatCopy_MPIAIJ" 1738 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1739 { 1740 PetscErrorCode ierr; 1741 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1742 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1743 1744 PetscFunctionBegin; 1745 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1746 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1747 /* because of the column compression in the off-processor part of the matrix a->B, 1748 the number of columns in a->B and b->B may be different, hence we cannot call 1749 the MatCopy() directly on the two parts. If need be, we can provide a more 1750 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1751 then copying the submatrices */ 1752 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1753 } else { 1754 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1755 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1762 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1763 { 1764 PetscErrorCode ierr; 1765 1766 PetscFunctionBegin; 1767 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #include "petscblaslapack.h" 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatAXPY_MPIAIJ" 1774 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1775 { 1776 PetscErrorCode ierr; 1777 PetscInt i; 1778 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1779 PetscBLASInt bnz,one=1; 1780 Mat_SeqAIJ *x,*y; 1781 1782 PetscFunctionBegin; 1783 if (str == SAME_NONZERO_PATTERN) { 1784 PetscScalar alpha = a; 1785 x = (Mat_SeqAIJ *)xx->A->data; 1786 y = (Mat_SeqAIJ *)yy->A->data; 1787 bnz = PetscBLASIntCast(x->nz); 1788 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1789 x = (Mat_SeqAIJ *)xx->B->data; 1790 y = (Mat_SeqAIJ *)yy->B->data; 1791 bnz = PetscBLASIntCast(x->nz); 1792 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1793 } else if (str == SUBSET_NONZERO_PATTERN) { 1794 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1795 1796 x = (Mat_SeqAIJ *)xx->B->data; 1797 y = (Mat_SeqAIJ *)yy->B->data; 1798 if (y->xtoy && y->XtoY != xx->B) { 1799 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1800 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1801 } 1802 if (!y->xtoy) { /* get xtoy */ 1803 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1804 y->XtoY = xx->B; 1805 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1806 } 1807 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1808 } else { 1809 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatConjugate_MPIAIJ" 1818 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1819 { 1820 #if defined(PETSC_USE_COMPLEX) 1821 PetscErrorCode ierr; 1822 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1823 1824 PetscFunctionBegin; 1825 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1826 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1827 #else 1828 PetscFunctionBegin; 1829 #endif 1830 PetscFunctionReturn(0); 1831 } 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatRealPart_MPIAIJ" 1835 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1836 { 1837 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1838 PetscErrorCode ierr; 1839 1840 PetscFunctionBegin; 1841 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1842 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1848 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1849 { 1850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1851 PetscErrorCode ierr; 1852 1853 PetscFunctionBegin; 1854 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1855 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #ifdef PETSC_HAVE_PBGL 1860 1861 #include <boost/parallel/mpi/bsp_process_group.hpp> 1862 #include <boost/graph/distributed/ilu_default_graph.hpp> 1863 #include <boost/graph/distributed/ilu_0_block.hpp> 1864 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1865 #include <boost/graph/distributed/petsc/interface.hpp> 1866 #include <boost/multi_array.hpp> 1867 #include <boost/parallel/distributed_property_map->hpp> 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1871 /* 1872 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1873 */ 1874 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1875 { 1876 namespace petsc = boost::distributed::petsc; 1877 1878 namespace graph_dist = boost::graph::distributed; 1879 using boost::graph::distributed::ilu_default::process_group_type; 1880 using boost::graph::ilu_permuted; 1881 1882 PetscTruth row_identity, col_identity; 1883 PetscContainer c; 1884 PetscInt m, n, M, N; 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1889 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1890 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1891 if (!row_identity || !col_identity) { 1892 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1893 } 1894 1895 process_group_type pg; 1896 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1897 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1898 lgraph_type& level_graph = *lgraph_p; 1899 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1900 1901 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1902 ilu_permuted(level_graph); 1903 1904 /* put together the new matrix */ 1905 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1906 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1907 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1908 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1909 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1910 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1911 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1912 1913 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1914 ierr = PetscContainerSetPointer(c, lgraph_p); 1915 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1921 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1922 { 1923 PetscFunctionBegin; 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatSolve_MPIAIJ" 1929 /* 1930 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1931 */ 1932 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1933 { 1934 namespace graph_dist = boost::graph::distributed; 1935 1936 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1937 lgraph_type* lgraph_p; 1938 PetscContainer c; 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1943 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1944 ierr = VecCopy(b, x);CHKERRQ(ierr); 1945 1946 PetscScalar* array_x; 1947 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1948 PetscInt sx; 1949 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1950 1951 PetscScalar* array_b; 1952 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1953 PetscInt sb; 1954 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1955 1956 lgraph_type& level_graph = *lgraph_p; 1957 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1958 1959 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1960 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1961 ref_x(array_x, boost::extents[num_vertices(graph)]); 1962 1963 typedef boost::iterator_property_map<array_ref_type::iterator, 1964 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1965 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1966 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1967 1968 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1969 1970 PetscFunctionReturn(0); 1971 } 1972 #endif 1973 1974 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1975 PetscInt nzlocal,nsends,nrecvs; 1976 PetscMPIInt *send_rank; 1977 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1978 PetscScalar *sbuf_a,**rbuf_a; 1979 PetscErrorCode (*MatDestroy)(Mat); 1980 } Mat_Redundant; 1981 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1984 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1985 { 1986 PetscErrorCode ierr; 1987 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1988 PetscInt i; 1989 1990 PetscFunctionBegin; 1991 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1992 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1993 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1994 for (i=0; i<redund->nrecvs; i++){ 1995 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1996 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1997 } 1998 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1999 ierr = PetscFree(redund);CHKERRQ(ierr); 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatDestroy_MatRedundant" 2005 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2006 { 2007 PetscErrorCode ierr; 2008 PetscContainer container; 2009 Mat_Redundant *redund=PETSC_NULL; 2010 2011 PetscFunctionBegin; 2012 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2013 if (container) { 2014 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2015 } else { 2016 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2017 } 2018 A->ops->destroy = redund->MatDestroy; 2019 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2020 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2021 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2027 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2028 { 2029 PetscMPIInt rank,size; 2030 MPI_Comm comm=((PetscObject)mat)->comm; 2031 PetscErrorCode ierr; 2032 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2033 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2034 PetscInt *rowrange=mat->rmap->range; 2035 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2036 Mat A=aij->A,B=aij->B,C=*matredundant; 2037 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2038 PetscScalar *sbuf_a; 2039 PetscInt nzlocal=a->nz+b->nz; 2040 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2041 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2042 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2043 MatScalar *aworkA,*aworkB; 2044 PetscScalar *vals; 2045 PetscMPIInt tag1,tag2,tag3,imdex; 2046 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2047 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2048 MPI_Status recv_status,*send_status; 2049 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2050 PetscInt **rbuf_j=PETSC_NULL; 2051 PetscScalar **rbuf_a=PETSC_NULL; 2052 Mat_Redundant *redund=PETSC_NULL; 2053 PetscContainer container; 2054 2055 PetscFunctionBegin; 2056 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2057 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2058 2059 if (reuse == MAT_REUSE_MATRIX) { 2060 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2061 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2062 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2063 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2064 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2065 if (container) { 2066 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2067 } else { 2068 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2069 } 2070 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2071 2072 nsends = redund->nsends; 2073 nrecvs = redund->nrecvs; 2074 send_rank = redund->send_rank; recv_rank = send_rank + size; 2075 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2076 sbuf_j = redund->sbuf_j; 2077 sbuf_a = redund->sbuf_a; 2078 rbuf_j = redund->rbuf_j; 2079 rbuf_a = redund->rbuf_a; 2080 } 2081 2082 if (reuse == MAT_INITIAL_MATRIX){ 2083 PetscMPIInt subrank,subsize; 2084 PetscInt nleftover,np_subcomm; 2085 /* get the destination processors' id send_rank, nsends and nrecvs */ 2086 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2087 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2088 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2089 recv_rank = send_rank + size; 2090 np_subcomm = size/nsubcomm; 2091 nleftover = size - nsubcomm*np_subcomm; 2092 nsends = 0; nrecvs = 0; 2093 for (i=0; i<size; i++){ /* i=rank*/ 2094 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2095 send_rank[nsends] = i; nsends++; 2096 recv_rank[nrecvs++] = i; 2097 } 2098 } 2099 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2100 i = size-nleftover-1; 2101 j = 0; 2102 while (j < nsubcomm - nleftover){ 2103 send_rank[nsends++] = i; 2104 i--; j++; 2105 } 2106 } 2107 2108 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2109 for (i=0; i<nleftover; i++){ 2110 recv_rank[nrecvs++] = size-nleftover+i; 2111 } 2112 } 2113 2114 /* allocate sbuf_j, sbuf_a */ 2115 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2116 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2117 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2118 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2119 2120 /* copy mat's local entries into the buffers */ 2121 if (reuse == MAT_INITIAL_MATRIX){ 2122 rownz_max = 0; 2123 rptr = sbuf_j; 2124 cols = sbuf_j + rend-rstart + 1; 2125 vals = sbuf_a; 2126 rptr[0] = 0; 2127 for (i=0; i<rend-rstart; i++){ 2128 row = i + rstart; 2129 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2130 ncols = nzA + nzB; 2131 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2132 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2133 /* load the column indices for this row into cols */ 2134 lwrite = 0; 2135 for (l=0; l<nzB; l++) { 2136 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2137 vals[lwrite] = aworkB[l]; 2138 cols[lwrite++] = ctmp; 2139 } 2140 } 2141 for (l=0; l<nzA; l++){ 2142 vals[lwrite] = aworkA[l]; 2143 cols[lwrite++] = cstart + cworkA[l]; 2144 } 2145 for (l=0; l<nzB; l++) { 2146 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2147 vals[lwrite] = aworkB[l]; 2148 cols[lwrite++] = ctmp; 2149 } 2150 } 2151 vals += ncols; 2152 cols += ncols; 2153 rptr[i+1] = rptr[i] + ncols; 2154 if (rownz_max < ncols) rownz_max = ncols; 2155 } 2156 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2157 } else { /* only copy matrix values into sbuf_a */ 2158 rptr = sbuf_j; 2159 vals = sbuf_a; 2160 rptr[0] = 0; 2161 for (i=0; i<rend-rstart; i++){ 2162 row = i + rstart; 2163 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2164 ncols = nzA + nzB; 2165 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2166 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2167 lwrite = 0; 2168 for (l=0; l<nzB; l++) { 2169 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2170 } 2171 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2172 for (l=0; l<nzB; l++) { 2173 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2174 } 2175 vals += ncols; 2176 rptr[i+1] = rptr[i] + ncols; 2177 } 2178 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2179 2180 /* send nzlocal to others, and recv other's nzlocal */ 2181 /*--------------------------------------------------*/ 2182 if (reuse == MAT_INITIAL_MATRIX){ 2183 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2184 s_waits2 = s_waits3 + nsends; 2185 s_waits1 = s_waits2 + nsends; 2186 r_waits1 = s_waits1 + nsends; 2187 r_waits2 = r_waits1 + nrecvs; 2188 r_waits3 = r_waits2 + nrecvs; 2189 } else { 2190 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2191 r_waits3 = s_waits3 + nsends; 2192 } 2193 2194 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2195 if (reuse == MAT_INITIAL_MATRIX){ 2196 /* get new tags to keep the communication clean */ 2197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2198 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2199 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2200 rbuf_nz = sbuf_nz + nsends; 2201 2202 /* post receives of other's nzlocal */ 2203 for (i=0; i<nrecvs; i++){ 2204 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2205 } 2206 /* send nzlocal to others */ 2207 for (i=0; i<nsends; i++){ 2208 sbuf_nz[i] = nzlocal; 2209 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2210 } 2211 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2212 count = nrecvs; 2213 while (count) { 2214 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2215 recv_rank[imdex] = recv_status.MPI_SOURCE; 2216 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2217 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2218 2219 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2220 rbuf_nz[imdex] += i + 2; 2221 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2222 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2223 count--; 2224 } 2225 /* wait on sends of nzlocal */ 2226 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2227 /* send mat->i,j to others, and recv from other's */ 2228 /*------------------------------------------------*/ 2229 for (i=0; i<nsends; i++){ 2230 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2231 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2232 } 2233 /* wait on receives of mat->i,j */ 2234 /*------------------------------*/ 2235 count = nrecvs; 2236 while (count) { 2237 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2238 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2239 count--; 2240 } 2241 /* wait on sends of mat->i,j */ 2242 /*---------------------------*/ 2243 if (nsends) { 2244 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2245 } 2246 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2247 2248 /* post receives, send and receive mat->a */ 2249 /*----------------------------------------*/ 2250 for (imdex=0; imdex<nrecvs; imdex++) { 2251 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2252 } 2253 for (i=0; i<nsends; i++){ 2254 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2255 } 2256 count = nrecvs; 2257 while (count) { 2258 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2259 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2260 count--; 2261 } 2262 if (nsends) { 2263 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2264 } 2265 2266 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2267 2268 /* create redundant matrix */ 2269 /*-------------------------*/ 2270 if (reuse == MAT_INITIAL_MATRIX){ 2271 /* compute rownz_max for preallocation */ 2272 for (imdex=0; imdex<nrecvs; imdex++){ 2273 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2274 rptr = rbuf_j[imdex]; 2275 for (i=0; i<j; i++){ 2276 ncols = rptr[i+1] - rptr[i]; 2277 if (rownz_max < ncols) rownz_max = ncols; 2278 } 2279 } 2280 2281 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2282 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2283 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2284 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2285 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2286 } else { 2287 C = *matredundant; 2288 } 2289 2290 /* insert local matrix entries */ 2291 rptr = sbuf_j; 2292 cols = sbuf_j + rend-rstart + 1; 2293 vals = sbuf_a; 2294 for (i=0; i<rend-rstart; i++){ 2295 row = i + rstart; 2296 ncols = rptr[i+1] - rptr[i]; 2297 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2298 vals += ncols; 2299 cols += ncols; 2300 } 2301 /* insert received matrix entries */ 2302 for (imdex=0; imdex<nrecvs; imdex++){ 2303 rstart = rowrange[recv_rank[imdex]]; 2304 rend = rowrange[recv_rank[imdex]+1]; 2305 rptr = rbuf_j[imdex]; 2306 cols = rbuf_j[imdex] + rend-rstart + 1; 2307 vals = rbuf_a[imdex]; 2308 for (i=0; i<rend-rstart; i++){ 2309 row = i + rstart; 2310 ncols = rptr[i+1] - rptr[i]; 2311 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2312 vals += ncols; 2313 cols += ncols; 2314 } 2315 } 2316 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2317 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2318 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2319 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2320 if (reuse == MAT_INITIAL_MATRIX){ 2321 PetscContainer container; 2322 *matredundant = C; 2323 /* create a supporting struct and attach it to C for reuse */ 2324 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2325 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2326 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2327 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2328 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2329 2330 redund->nzlocal = nzlocal; 2331 redund->nsends = nsends; 2332 redund->nrecvs = nrecvs; 2333 redund->send_rank = send_rank; 2334 redund->sbuf_nz = sbuf_nz; 2335 redund->sbuf_j = sbuf_j; 2336 redund->sbuf_a = sbuf_a; 2337 redund->rbuf_j = rbuf_j; 2338 redund->rbuf_a = rbuf_a; 2339 2340 redund->MatDestroy = C->ops->destroy; 2341 C->ops->destroy = MatDestroy_MatRedundant; 2342 } 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2348 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2349 { 2350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2351 PetscErrorCode ierr; 2352 PetscInt i,*idxb = 0; 2353 PetscScalar *va,*vb; 2354 Vec vtmp; 2355 2356 PetscFunctionBegin; 2357 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2358 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2359 if (idx) { 2360 for (i=0; i<A->rmap->n; i++) { 2361 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2362 } 2363 } 2364 2365 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2366 if (idx) { 2367 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2368 } 2369 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2370 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2371 2372 for (i=0; i<A->rmap->n; i++){ 2373 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2374 va[i] = vb[i]; 2375 if (idx) idx[i] = a->garray[idxb[i]]; 2376 } 2377 } 2378 2379 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2380 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2381 if (idxb) { 2382 ierr = PetscFree(idxb);CHKERRQ(ierr); 2383 } 2384 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2391 { 2392 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2393 PetscErrorCode ierr; 2394 PetscInt i,*idxb = 0; 2395 PetscScalar *va,*vb; 2396 Vec vtmp; 2397 2398 PetscFunctionBegin; 2399 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2400 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2401 if (idx) { 2402 for (i=0; i<A->cmap->n; i++) { 2403 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2404 } 2405 } 2406 2407 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2408 if (idx) { 2409 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2410 } 2411 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2412 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2413 2414 for (i=0; i<A->rmap->n; i++){ 2415 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2416 va[i] = vb[i]; 2417 if (idx) idx[i] = a->garray[idxb[i]]; 2418 } 2419 } 2420 2421 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2422 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2423 if (idxb) { 2424 ierr = PetscFree(idxb);CHKERRQ(ierr); 2425 } 2426 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2432 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2433 { 2434 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2435 PetscInt n = A->rmap->n; 2436 PetscInt cstart = A->cmap->rstart; 2437 PetscInt *cmap = mat->garray; 2438 PetscInt *diagIdx, *offdiagIdx; 2439 Vec diagV, offdiagV; 2440 PetscScalar *a, *diagA, *offdiagA; 2441 PetscInt r; 2442 PetscErrorCode ierr; 2443 2444 PetscFunctionBegin; 2445 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2446 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2447 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2448 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2449 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2450 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2451 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2452 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2453 for(r = 0; r < n; ++r) { 2454 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2455 a[r] = diagA[r]; 2456 idx[r] = cstart + diagIdx[r]; 2457 } else { 2458 a[r] = offdiagA[r]; 2459 idx[r] = cmap[offdiagIdx[r]]; 2460 } 2461 } 2462 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2464 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2465 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2466 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2467 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2473 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2474 { 2475 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2476 PetscInt n = A->rmap->n; 2477 PetscInt cstart = A->cmap->rstart; 2478 PetscInt *cmap = mat->garray; 2479 PetscInt *diagIdx, *offdiagIdx; 2480 Vec diagV, offdiagV; 2481 PetscScalar *a, *diagA, *offdiagA; 2482 PetscInt r; 2483 PetscErrorCode ierr; 2484 2485 PetscFunctionBegin; 2486 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2487 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2488 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2489 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2490 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2491 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2492 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2493 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2494 for(r = 0; r < n; ++r) { 2495 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2496 a[r] = diagA[r]; 2497 idx[r] = cstart + diagIdx[r]; 2498 } else { 2499 a[r] = offdiagA[r]; 2500 idx[r] = cmap[offdiagIdx[r]]; 2501 } 2502 } 2503 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2504 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2505 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2506 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2507 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2508 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2509 PetscFunctionReturn(0); 2510 } 2511 2512 #undef __FUNCT__ 2513 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2514 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2515 { 2516 PetscErrorCode ierr; 2517 2518 PetscFunctionBegin; 2519 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 /* -------------------------------------------------------------------*/ 2524 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2525 MatGetRow_MPIAIJ, 2526 MatRestoreRow_MPIAIJ, 2527 MatMult_MPIAIJ, 2528 /* 4*/ MatMultAdd_MPIAIJ, 2529 MatMultTranspose_MPIAIJ, 2530 MatMultTransposeAdd_MPIAIJ, 2531 #ifdef PETSC_HAVE_PBGL 2532 MatSolve_MPIAIJ, 2533 #else 2534 0, 2535 #endif 2536 0, 2537 0, 2538 /*10*/ 0, 2539 0, 2540 0, 2541 MatRelax_MPIAIJ, 2542 MatTranspose_MPIAIJ, 2543 /*15*/ MatGetInfo_MPIAIJ, 2544 MatEqual_MPIAIJ, 2545 MatGetDiagonal_MPIAIJ, 2546 MatDiagonalScale_MPIAIJ, 2547 MatNorm_MPIAIJ, 2548 /*20*/ MatAssemblyBegin_MPIAIJ, 2549 MatAssemblyEnd_MPIAIJ, 2550 MatSetOption_MPIAIJ, 2551 MatZeroEntries_MPIAIJ, 2552 /*24*/ MatZeroRows_MPIAIJ, 2553 0, 2554 #ifdef PETSC_HAVE_PBGL 2555 0, 2556 #else 2557 0, 2558 #endif 2559 0, 2560 0, 2561 /*29*/ MatSetUpPreallocation_MPIAIJ, 2562 #ifdef PETSC_HAVE_PBGL 2563 0, 2564 #else 2565 0, 2566 #endif 2567 0, 2568 0, 2569 0, 2570 /*34*/ MatDuplicate_MPIAIJ, 2571 0, 2572 0, 2573 0, 2574 0, 2575 /*39*/ MatAXPY_MPIAIJ, 2576 MatGetSubMatrices_MPIAIJ, 2577 MatIncreaseOverlap_MPIAIJ, 2578 MatGetValues_MPIAIJ, 2579 MatCopy_MPIAIJ, 2580 /*44*/ MatGetRowMax_MPIAIJ, 2581 MatScale_MPIAIJ, 2582 0, 2583 0, 2584 0, 2585 /*49*/ MatSetBlockSize_MPIAIJ, 2586 0, 2587 0, 2588 0, 2589 0, 2590 /*54*/ MatFDColoringCreate_MPIAIJ, 2591 0, 2592 MatSetUnfactored_MPIAIJ, 2593 MatPermute_MPIAIJ, 2594 0, 2595 /*59*/ MatGetSubMatrix_MPIAIJ, 2596 MatDestroy_MPIAIJ, 2597 MatView_MPIAIJ, 2598 0, 2599 0, 2600 /*64*/ 0, 2601 0, 2602 0, 2603 0, 2604 0, 2605 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2606 MatGetRowMinAbs_MPIAIJ, 2607 0, 2608 MatSetColoring_MPIAIJ, 2609 #if defined(PETSC_HAVE_ADIC) 2610 MatSetValuesAdic_MPIAIJ, 2611 #else 2612 0, 2613 #endif 2614 MatSetValuesAdifor_MPIAIJ, 2615 /*75*/ 0, 2616 0, 2617 0, 2618 0, 2619 0, 2620 /*80*/ 0, 2621 0, 2622 0, 2623 /*83*/ MatLoad_MPIAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2630 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2631 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2632 MatPtAP_Basic, 2633 MatPtAPSymbolic_MPIAIJ, 2634 /*94*/ MatPtAPNumeric_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*99*/ 0, 2640 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2641 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2642 MatConjugate_MPIAIJ, 2643 0, 2644 /*104*/MatSetValuesRow_MPIAIJ, 2645 MatRealPart_MPIAIJ, 2646 MatImaginaryPart_MPIAIJ, 2647 0, 2648 0, 2649 /*109*/0, 2650 MatGetRedundantMatrix_MPIAIJ, 2651 MatGetRowMin_MPIAIJ, 2652 0, 2653 0, 2654 /*114*/MatGetSeqNonzerostructure_MPIAIJ}; 2655 2656 /* ----------------------------------------------------------------------------------------*/ 2657 2658 EXTERN_C_BEGIN 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2661 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2662 { 2663 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2664 PetscErrorCode ierr; 2665 2666 PetscFunctionBegin; 2667 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2668 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2669 PetscFunctionReturn(0); 2670 } 2671 EXTERN_C_END 2672 2673 EXTERN_C_BEGIN 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2676 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2677 { 2678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2679 PetscErrorCode ierr; 2680 2681 PetscFunctionBegin; 2682 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2683 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2684 PetscFunctionReturn(0); 2685 } 2686 EXTERN_C_END 2687 2688 #include "petscpc.h" 2689 EXTERN_C_BEGIN 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2692 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2693 { 2694 Mat_MPIAIJ *b; 2695 PetscErrorCode ierr; 2696 PetscInt i; 2697 2698 PetscFunctionBegin; 2699 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2700 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2701 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2702 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2703 2704 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2705 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2706 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2707 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2708 if (d_nnz) { 2709 for (i=0; i<B->rmap->n; i++) { 2710 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2711 } 2712 } 2713 if (o_nnz) { 2714 for (i=0; i<B->rmap->n; i++) { 2715 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2716 } 2717 } 2718 b = (Mat_MPIAIJ*)B->data; 2719 2720 if (!B->preallocated) { 2721 /* Explicitly create 2 MATSEQAIJ matrices. */ 2722 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2723 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2724 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2725 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2726 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2727 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2728 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2729 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2730 } 2731 2732 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2733 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2734 B->preallocated = PETSC_TRUE; 2735 PetscFunctionReturn(0); 2736 } 2737 EXTERN_C_END 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2741 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2742 { 2743 Mat mat; 2744 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2745 PetscErrorCode ierr; 2746 2747 PetscFunctionBegin; 2748 *newmat = 0; 2749 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2750 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2751 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2752 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2753 a = (Mat_MPIAIJ*)mat->data; 2754 2755 mat->factor = matin->factor; 2756 mat->rmap->bs = matin->rmap->bs; 2757 mat->assembled = PETSC_TRUE; 2758 mat->insertmode = NOT_SET_VALUES; 2759 mat->preallocated = PETSC_TRUE; 2760 2761 a->size = oldmat->size; 2762 a->rank = oldmat->rank; 2763 a->donotstash = oldmat->donotstash; 2764 a->roworiented = oldmat->roworiented; 2765 a->rowindices = 0; 2766 a->rowvalues = 0; 2767 a->getrowactive = PETSC_FALSE; 2768 2769 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2770 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2771 2772 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2773 if (oldmat->colmap) { 2774 #if defined (PETSC_USE_CTABLE) 2775 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2776 #else 2777 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2778 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2779 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2780 #endif 2781 } else a->colmap = 0; 2782 if (oldmat->garray) { 2783 PetscInt len; 2784 len = oldmat->B->cmap->n; 2785 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2786 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2787 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2788 } else a->garray = 0; 2789 2790 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2792 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2793 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2794 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2795 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2796 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2798 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2799 *newmat = mat; 2800 PetscFunctionReturn(0); 2801 } 2802 2803 #include "petscsys.h" 2804 2805 #undef __FUNCT__ 2806 #define __FUNCT__ "MatLoad_MPIAIJ" 2807 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2808 { 2809 Mat A; 2810 PetscScalar *vals,*svals; 2811 MPI_Comm comm = ((PetscObject)viewer)->comm; 2812 MPI_Status status; 2813 PetscErrorCode ierr; 2814 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2815 PetscInt i,nz,j,rstart,rend,mmax,maxnz; 2816 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2817 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2818 PetscInt cend,cstart,n,*rowners; 2819 int fd; 2820 2821 PetscFunctionBegin; 2822 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2823 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2824 if (!rank) { 2825 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2826 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2827 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2828 } 2829 2830 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2831 M = header[1]; N = header[2]; 2832 /* determine ownership of all rows */ 2833 m = M/size + ((M % size) > rank); 2834 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2835 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2836 2837 /* First process needs enough room for process with most rows */ 2838 if (!rank) { 2839 mmax = rowners[1]; 2840 for (i=2; i<size; i++) { 2841 mmax = PetscMax(mmax,rowners[i]); 2842 } 2843 } else mmax = m; 2844 2845 rowners[0] = 0; 2846 for (i=2; i<=size; i++) { 2847 rowners[i] += rowners[i-1]; 2848 } 2849 rstart = rowners[rank]; 2850 rend = rowners[rank+1]; 2851 2852 /* distribute row lengths to all processors */ 2853 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2854 if (!rank) { 2855 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2856 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2857 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2858 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2859 for (j=0; j<m; j++) { 2860 procsnz[0] += ourlens[j]; 2861 } 2862 for (i=1; i<size; i++) { 2863 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2864 /* calculate the number of nonzeros on each processor */ 2865 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2866 procsnz[i] += rowlengths[j]; 2867 } 2868 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2869 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2870 } 2871 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2872 } else { 2873 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2874 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2875 } 2876 2877 if (!rank) { 2878 /* determine max buffer needed and allocate it */ 2879 maxnz = 0; 2880 for (i=0; i<size; i++) { 2881 maxnz = PetscMax(maxnz,procsnz[i]); 2882 } 2883 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2884 2885 /* read in my part of the matrix column indices */ 2886 nz = procsnz[0]; 2887 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2888 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2889 2890 /* read in every one elses and ship off */ 2891 for (i=1; i<size; i++) { 2892 nz = procsnz[i]; 2893 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2894 mpicnt = PetscMPIIntCast(nz); 2895 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2896 } 2897 ierr = PetscFree(cols);CHKERRQ(ierr); 2898 } else { 2899 /* determine buffer space needed for message */ 2900 nz = 0; 2901 for (i=0; i<m; i++) { 2902 nz += ourlens[i]; 2903 } 2904 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2905 2906 /* receive message of column indices*/ 2907 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2908 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2909 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2910 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2911 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2912 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2913 } 2914 2915 /* determine column ownership if matrix is not square */ 2916 if (N != M) { 2917 n = N/size + ((N % size) > rank); 2918 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2919 cstart = cend - n; 2920 } else { 2921 cstart = rstart; 2922 cend = rend; 2923 n = cend - cstart; 2924 } 2925 2926 /* loop over local rows, determining number of off diagonal entries */ 2927 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2928 jj = 0; 2929 for (i=0; i<m; i++) { 2930 for (j=0; j<ourlens[i]; j++) { 2931 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2932 jj++; 2933 } 2934 } 2935 2936 /* create our matrix */ 2937 for (i=0; i<m; i++) { 2938 ourlens[i] -= offlens[i]; 2939 } 2940 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2941 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2942 ierr = MatSetType(A,type);CHKERRQ(ierr); 2943 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2944 2945 for (i=0; i<m; i++) { 2946 ourlens[i] += offlens[i]; 2947 } 2948 2949 if (!rank) { 2950 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2951 2952 /* read in my part of the matrix numerical values */ 2953 nz = procsnz[0]; 2954 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2955 2956 /* insert into matrix */ 2957 jj = rstart; 2958 smycols = mycols; 2959 svals = vals; 2960 for (i=0; i<m; i++) { 2961 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2962 smycols += ourlens[i]; 2963 svals += ourlens[i]; 2964 jj++; 2965 } 2966 2967 /* read in other processors and ship out */ 2968 for (i=1; i<size; i++) { 2969 nz = procsnz[i]; 2970 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2971 mpicnt = PetscMPIIntCast(nz); 2972 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2973 } 2974 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2975 } else { 2976 /* receive numeric values */ 2977 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2978 2979 /* receive message of values*/ 2980 mpicnt = PetscMPIIntCast(nz); 2981 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2982 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 2983 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2984 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2985 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2986 2987 /* insert into matrix */ 2988 jj = rstart; 2989 smycols = mycols; 2990 svals = vals; 2991 for (i=0; i<m; i++) { 2992 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2993 smycols += ourlens[i]; 2994 svals += ourlens[i]; 2995 jj++; 2996 } 2997 } 2998 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2999 ierr = PetscFree(vals);CHKERRQ(ierr); 3000 ierr = PetscFree(mycols);CHKERRQ(ierr); 3001 ierr = PetscFree(rowners);CHKERRQ(ierr); 3002 3003 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3004 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3005 *newmat = A; 3006 PetscFunctionReturn(0); 3007 } 3008 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3011 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3012 { 3013 PetscErrorCode ierr; 3014 IS iscol_local; 3015 PetscInt csize; 3016 3017 PetscFunctionBegin; 3018 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3019 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3020 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3021 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3022 PetscFunctionReturn(0); 3023 } 3024 3025 #undef __FUNCT__ 3026 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3027 /* 3028 Not great since it makes two copies of the submatrix, first an SeqAIJ 3029 in local and then by concatenating the local matrices the end result. 3030 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3031 3032 Note: This requires a sequential iscol with all indices. 3033 */ 3034 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3035 { 3036 PetscErrorCode ierr; 3037 PetscMPIInt rank,size; 3038 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3039 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3040 Mat *local,M,Mreuse; 3041 MatScalar *vwork,*aa; 3042 MPI_Comm comm = ((PetscObject)mat)->comm; 3043 Mat_SeqAIJ *aij; 3044 3045 3046 PetscFunctionBegin; 3047 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3048 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3049 3050 if (call == MAT_REUSE_MATRIX) { 3051 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3052 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3053 local = &Mreuse; 3054 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3055 } else { 3056 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3057 Mreuse = *local; 3058 ierr = PetscFree(local);CHKERRQ(ierr); 3059 } 3060 3061 /* 3062 m - number of local rows 3063 n - number of columns (same on all processors) 3064 rstart - first row in new global matrix generated 3065 */ 3066 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3067 if (call == MAT_INITIAL_MATRIX) { 3068 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3069 ii = aij->i; 3070 jj = aij->j; 3071 3072 /* 3073 Determine the number of non-zeros in the diagonal and off-diagonal 3074 portions of the matrix in order to do correct preallocation 3075 */ 3076 3077 /* first get start and end of "diagonal" columns */ 3078 if (csize == PETSC_DECIDE) { 3079 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3080 if (mglobal == n) { /* square matrix */ 3081 nlocal = m; 3082 } else { 3083 nlocal = n/size + ((n % size) > rank); 3084 } 3085 } else { 3086 nlocal = csize; 3087 } 3088 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3089 rstart = rend - nlocal; 3090 if (rank == size - 1 && rend != n) { 3091 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3092 } 3093 3094 /* next, compute all the lengths */ 3095 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3096 olens = dlens + m; 3097 for (i=0; i<m; i++) { 3098 jend = ii[i+1] - ii[i]; 3099 olen = 0; 3100 dlen = 0; 3101 for (j=0; j<jend; j++) { 3102 if (*jj < rstart || *jj >= rend) olen++; 3103 else dlen++; 3104 jj++; 3105 } 3106 olens[i] = olen; 3107 dlens[i] = dlen; 3108 } 3109 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3110 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3111 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3112 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3113 ierr = PetscFree(dlens);CHKERRQ(ierr); 3114 } else { 3115 PetscInt ml,nl; 3116 3117 M = *newmat; 3118 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3119 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3120 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3121 /* 3122 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3123 rather than the slower MatSetValues(). 3124 */ 3125 M->was_assembled = PETSC_TRUE; 3126 M->assembled = PETSC_FALSE; 3127 } 3128 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3129 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3130 ii = aij->i; 3131 jj = aij->j; 3132 aa = aij->a; 3133 for (i=0; i<m; i++) { 3134 row = rstart + i; 3135 nz = ii[i+1] - ii[i]; 3136 cwork = jj; jj += nz; 3137 vwork = aa; aa += nz; 3138 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3139 } 3140 3141 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3142 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3143 *newmat = M; 3144 3145 /* save submatrix used in processor for next request */ 3146 if (call == MAT_INITIAL_MATRIX) { 3147 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3148 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3149 } 3150 3151 PetscFunctionReturn(0); 3152 } 3153 3154 EXTERN_C_BEGIN 3155 #undef __FUNCT__ 3156 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3157 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3158 { 3159 PetscInt m,cstart, cend,j,nnz,i,d; 3160 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3161 const PetscInt *JJ; 3162 PetscScalar *values; 3163 PetscErrorCode ierr; 3164 3165 PetscFunctionBegin; 3166 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3167 3168 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3169 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3170 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3171 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3172 m = B->rmap->n; 3173 cstart = B->cmap->rstart; 3174 cend = B->cmap->rend; 3175 rstart = B->rmap->rstart; 3176 3177 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3178 o_nnz = d_nnz + m; 3179 3180 #if defined(PETSC_USE_DEBUGGING) 3181 for (i=0; i<m; i++) { 3182 nnz = Ii[i+1]- Ii[i]; 3183 JJ = J + Ii[i]; 3184 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3185 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3186 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3187 for (j=1; j<nnz; j++) { 3188 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3189 } 3190 } 3191 #endif 3192 3193 for (i=0; i<m; i++) { 3194 nnz = Ii[i+1]- Ii[i]; 3195 JJ = J + Ii[i]; 3196 nnz_max = PetscMax(nnz_max,nnz); 3197 for (j=0; j<nnz; j++) { 3198 if (*JJ >= cstart) break; 3199 JJ++; 3200 } 3201 d = 0; 3202 for (; j<nnz; j++) { 3203 if (*JJ++ >= cend) break; 3204 d++; 3205 } 3206 d_nnz[i] = d; 3207 o_nnz[i] = nnz - d; 3208 } 3209 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3210 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3211 3212 if (v) values = (PetscScalar*)v; 3213 else { 3214 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3215 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3216 } 3217 3218 for (i=0; i<m; i++) { 3219 ii = i + rstart; 3220 nnz = Ii[i+1]- Ii[i]; 3221 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3222 } 3223 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3224 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3225 3226 if (!v) { 3227 ierr = PetscFree(values);CHKERRQ(ierr); 3228 } 3229 PetscFunctionReturn(0); 3230 } 3231 EXTERN_C_END 3232 3233 #undef __FUNCT__ 3234 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3235 /*@ 3236 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3237 (the default parallel PETSc format). 3238 3239 Collective on MPI_Comm 3240 3241 Input Parameters: 3242 + B - the matrix 3243 . i - the indices into j for the start of each local row (starts with zero) 3244 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3245 - v - optional values in the matrix 3246 3247 Level: developer 3248 3249 Notes: 3250 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3251 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3252 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3253 3254 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3255 3256 The format which is used for the sparse matrix input, is equivalent to a 3257 row-major ordering.. i.e for the following matrix, the input data expected is 3258 as shown: 3259 3260 1 0 0 3261 2 0 3 P0 3262 ------- 3263 4 5 6 P1 3264 3265 Process0 [P0]: rows_owned=[0,1] 3266 i = {0,1,3} [size = nrow+1 = 2+1] 3267 j = {0,0,2} [size = nz = 6] 3268 v = {1,2,3} [size = nz = 6] 3269 3270 Process1 [P1]: rows_owned=[2] 3271 i = {0,3} [size = nrow+1 = 1+1] 3272 j = {0,1,2} [size = nz = 6] 3273 v = {4,5,6} [size = nz = 6] 3274 3275 The column indices for each row MUST be sorted. 3276 3277 .keywords: matrix, aij, compressed row, sparse, parallel 3278 3279 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3280 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3281 @*/ 3282 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3283 { 3284 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3285 3286 PetscFunctionBegin; 3287 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3288 if (f) { 3289 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3290 } 3291 PetscFunctionReturn(0); 3292 } 3293 3294 #undef __FUNCT__ 3295 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3296 /*@C 3297 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3298 (the default parallel PETSc format). For good matrix assembly performance 3299 the user should preallocate the matrix storage by setting the parameters 3300 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3301 performance can be increased by more than a factor of 50. 3302 3303 Collective on MPI_Comm 3304 3305 Input Parameters: 3306 + A - the matrix 3307 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3308 (same value is used for all local rows) 3309 . d_nnz - array containing the number of nonzeros in the various rows of the 3310 DIAGONAL portion of the local submatrix (possibly different for each row) 3311 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3312 The size of this array is equal to the number of local rows, i.e 'm'. 3313 You must leave room for the diagonal entry even if it is zero. 3314 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3315 submatrix (same value is used for all local rows). 3316 - o_nnz - array containing the number of nonzeros in the various rows of the 3317 OFF-DIAGONAL portion of the local submatrix (possibly different for 3318 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3319 structure. The size of this array is equal to the number 3320 of local rows, i.e 'm'. 3321 3322 If the *_nnz parameter is given then the *_nz parameter is ignored 3323 3324 The AIJ format (also called the Yale sparse matrix format or 3325 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3326 storage. The stored row and column indices begin with zero. See the users manual for details. 3327 3328 The parallel matrix is partitioned such that the first m0 rows belong to 3329 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3330 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3331 3332 The DIAGONAL portion of the local submatrix of a processor can be defined 3333 as the submatrix which is obtained by extraction the part corresponding 3334 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3335 first row that belongs to the processor, and r2 is the last row belonging 3336 to the this processor. This is a square mxm matrix. The remaining portion 3337 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3338 3339 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3340 3341 You can call MatGetInfo() to get information on how effective the preallocation was; 3342 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3343 You can also run with the option -info and look for messages with the string 3344 malloc in them to see if additional memory allocation was needed. 3345 3346 Example usage: 3347 3348 Consider the following 8x8 matrix with 34 non-zero values, that is 3349 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3350 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3351 as follows: 3352 3353 .vb 3354 1 2 0 | 0 3 0 | 0 4 3355 Proc0 0 5 6 | 7 0 0 | 8 0 3356 9 0 10 | 11 0 0 | 12 0 3357 ------------------------------------- 3358 13 0 14 | 15 16 17 | 0 0 3359 Proc1 0 18 0 | 19 20 21 | 0 0 3360 0 0 0 | 22 23 0 | 24 0 3361 ------------------------------------- 3362 Proc2 25 26 27 | 0 0 28 | 29 0 3363 30 0 0 | 31 32 33 | 0 34 3364 .ve 3365 3366 This can be represented as a collection of submatrices as: 3367 3368 .vb 3369 A B C 3370 D E F 3371 G H I 3372 .ve 3373 3374 Where the submatrices A,B,C are owned by proc0, D,E,F are 3375 owned by proc1, G,H,I are owned by proc2. 3376 3377 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3378 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3379 The 'M','N' parameters are 8,8, and have the same values on all procs. 3380 3381 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3382 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3383 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3384 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3385 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3386 matrix, ans [DF] as another SeqAIJ matrix. 3387 3388 When d_nz, o_nz parameters are specified, d_nz storage elements are 3389 allocated for every row of the local diagonal submatrix, and o_nz 3390 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3391 One way to choose d_nz and o_nz is to use the max nonzerors per local 3392 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3393 In this case, the values of d_nz,o_nz are: 3394 .vb 3395 proc0 : dnz = 2, o_nz = 2 3396 proc1 : dnz = 3, o_nz = 2 3397 proc2 : dnz = 1, o_nz = 4 3398 .ve 3399 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3400 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3401 for proc3. i.e we are using 12+15+10=37 storage locations to store 3402 34 values. 3403 3404 When d_nnz, o_nnz parameters are specified, the storage is specified 3405 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3406 In the above case the values for d_nnz,o_nnz are: 3407 .vb 3408 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3409 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3410 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3411 .ve 3412 Here the space allocated is sum of all the above values i.e 34, and 3413 hence pre-allocation is perfect. 3414 3415 Level: intermediate 3416 3417 .keywords: matrix, aij, compressed row, sparse, parallel 3418 3419 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3420 MPIAIJ, MatGetInfo() 3421 @*/ 3422 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3423 { 3424 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3425 3426 PetscFunctionBegin; 3427 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3428 if (f) { 3429 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3430 } 3431 PetscFunctionReturn(0); 3432 } 3433 3434 #undef __FUNCT__ 3435 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3436 /*@ 3437 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3438 CSR format the local rows. 3439 3440 Collective on MPI_Comm 3441 3442 Input Parameters: 3443 + comm - MPI communicator 3444 . m - number of local rows (Cannot be PETSC_DECIDE) 3445 . n - This value should be the same as the local size used in creating the 3446 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3447 calculated if N is given) For square matrices n is almost always m. 3448 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3449 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3450 . i - row indices 3451 . j - column indices 3452 - a - matrix values 3453 3454 Output Parameter: 3455 . mat - the matrix 3456 3457 Level: intermediate 3458 3459 Notes: 3460 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3461 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3462 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3463 3464 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3465 3466 The format which is used for the sparse matrix input, is equivalent to a 3467 row-major ordering.. i.e for the following matrix, the input data expected is 3468 as shown: 3469 3470 1 0 0 3471 2 0 3 P0 3472 ------- 3473 4 5 6 P1 3474 3475 Process0 [P0]: rows_owned=[0,1] 3476 i = {0,1,3} [size = nrow+1 = 2+1] 3477 j = {0,0,2} [size = nz = 6] 3478 v = {1,2,3} [size = nz = 6] 3479 3480 Process1 [P1]: rows_owned=[2] 3481 i = {0,3} [size = nrow+1 = 1+1] 3482 j = {0,1,2} [size = nz = 6] 3483 v = {4,5,6} [size = nz = 6] 3484 3485 .keywords: matrix, aij, compressed row, sparse, parallel 3486 3487 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3488 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3489 @*/ 3490 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3491 { 3492 PetscErrorCode ierr; 3493 3494 PetscFunctionBegin; 3495 if (i[0]) { 3496 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3497 } 3498 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3499 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3500 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3501 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3502 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3503 PetscFunctionReturn(0); 3504 } 3505 3506 #undef __FUNCT__ 3507 #define __FUNCT__ "MatCreateMPIAIJ" 3508 /*@C 3509 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3510 (the default parallel PETSc format). For good matrix assembly performance 3511 the user should preallocate the matrix storage by setting the parameters 3512 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3513 performance can be increased by more than a factor of 50. 3514 3515 Collective on MPI_Comm 3516 3517 Input Parameters: 3518 + comm - MPI communicator 3519 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3520 This value should be the same as the local size used in creating the 3521 y vector for the matrix-vector product y = Ax. 3522 . n - This value should be the same as the local size used in creating the 3523 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3524 calculated if N is given) For square matrices n is almost always m. 3525 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3526 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3527 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3528 (same value is used for all local rows) 3529 . d_nnz - array containing the number of nonzeros in the various rows of the 3530 DIAGONAL portion of the local submatrix (possibly different for each row) 3531 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3532 The size of this array is equal to the number of local rows, i.e 'm'. 3533 You must leave room for the diagonal entry even if it is zero. 3534 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3535 submatrix (same value is used for all local rows). 3536 - o_nnz - array containing the number of nonzeros in the various rows of the 3537 OFF-DIAGONAL portion of the local submatrix (possibly different for 3538 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3539 structure. The size of this array is equal to the number 3540 of local rows, i.e 'm'. 3541 3542 Output Parameter: 3543 . A - the matrix 3544 3545 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3546 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3547 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3548 3549 Notes: 3550 If the *_nnz parameter is given then the *_nz parameter is ignored 3551 3552 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3553 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3554 storage requirements for this matrix. 3555 3556 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3557 processor than it must be used on all processors that share the object for 3558 that argument. 3559 3560 The user MUST specify either the local or global matrix dimensions 3561 (possibly both). 3562 3563 The parallel matrix is partitioned across processors such that the 3564 first m0 rows belong to process 0, the next m1 rows belong to 3565 process 1, the next m2 rows belong to process 2 etc.. where 3566 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3567 values corresponding to [m x N] submatrix. 3568 3569 The columns are logically partitioned with the n0 columns belonging 3570 to 0th partition, the next n1 columns belonging to the next 3571 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3572 3573 The DIAGONAL portion of the local submatrix on any given processor 3574 is the submatrix corresponding to the rows and columns m,n 3575 corresponding to the given processor. i.e diagonal matrix on 3576 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3577 etc. The remaining portion of the local submatrix [m x (N-n)] 3578 constitute the OFF-DIAGONAL portion. The example below better 3579 illustrates this concept. 3580 3581 For a square global matrix we define each processor's diagonal portion 3582 to be its local rows and the corresponding columns (a square submatrix); 3583 each processor's off-diagonal portion encompasses the remainder of the 3584 local matrix (a rectangular submatrix). 3585 3586 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3587 3588 When calling this routine with a single process communicator, a matrix of 3589 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3590 type of communicator, use the construction mechanism: 3591 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3592 3593 By default, this format uses inodes (identical nodes) when possible. 3594 We search for consecutive rows with the same nonzero structure, thereby 3595 reusing matrix information to achieve increased efficiency. 3596 3597 Options Database Keys: 3598 + -mat_no_inode - Do not use inodes 3599 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3600 - -mat_aij_oneindex - Internally use indexing starting at 1 3601 rather than 0. Note that when calling MatSetValues(), 3602 the user still MUST index entries starting at 0! 3603 3604 3605 Example usage: 3606 3607 Consider the following 8x8 matrix with 34 non-zero values, that is 3608 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3609 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3610 as follows: 3611 3612 .vb 3613 1 2 0 | 0 3 0 | 0 4 3614 Proc0 0 5 6 | 7 0 0 | 8 0 3615 9 0 10 | 11 0 0 | 12 0 3616 ------------------------------------- 3617 13 0 14 | 15 16 17 | 0 0 3618 Proc1 0 18 0 | 19 20 21 | 0 0 3619 0 0 0 | 22 23 0 | 24 0 3620 ------------------------------------- 3621 Proc2 25 26 27 | 0 0 28 | 29 0 3622 30 0 0 | 31 32 33 | 0 34 3623 .ve 3624 3625 This can be represented as a collection of submatrices as: 3626 3627 .vb 3628 A B C 3629 D E F 3630 G H I 3631 .ve 3632 3633 Where the submatrices A,B,C are owned by proc0, D,E,F are 3634 owned by proc1, G,H,I are owned by proc2. 3635 3636 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3637 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3638 The 'M','N' parameters are 8,8, and have the same values on all procs. 3639 3640 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3641 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3642 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3643 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3644 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3645 matrix, ans [DF] as another SeqAIJ matrix. 3646 3647 When d_nz, o_nz parameters are specified, d_nz storage elements are 3648 allocated for every row of the local diagonal submatrix, and o_nz 3649 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3650 One way to choose d_nz and o_nz is to use the max nonzerors per local 3651 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3652 In this case, the values of d_nz,o_nz are: 3653 .vb 3654 proc0 : dnz = 2, o_nz = 2 3655 proc1 : dnz = 3, o_nz = 2 3656 proc2 : dnz = 1, o_nz = 4 3657 .ve 3658 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3659 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3660 for proc3. i.e we are using 12+15+10=37 storage locations to store 3661 34 values. 3662 3663 When d_nnz, o_nnz parameters are specified, the storage is specified 3664 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3665 In the above case the values for d_nnz,o_nnz are: 3666 .vb 3667 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3668 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3669 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3670 .ve 3671 Here the space allocated is sum of all the above values i.e 34, and 3672 hence pre-allocation is perfect. 3673 3674 Level: intermediate 3675 3676 .keywords: matrix, aij, compressed row, sparse, parallel 3677 3678 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3679 MPIAIJ, MatCreateMPIAIJWithArrays() 3680 @*/ 3681 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3682 { 3683 PetscErrorCode ierr; 3684 PetscMPIInt size; 3685 3686 PetscFunctionBegin; 3687 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3688 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3689 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3690 if (size > 1) { 3691 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3692 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3693 } else { 3694 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3695 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3696 } 3697 PetscFunctionReturn(0); 3698 } 3699 3700 #undef __FUNCT__ 3701 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3702 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3703 { 3704 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3705 3706 PetscFunctionBegin; 3707 *Ad = a->A; 3708 *Ao = a->B; 3709 *colmap = a->garray; 3710 PetscFunctionReturn(0); 3711 } 3712 3713 #undef __FUNCT__ 3714 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3715 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3716 { 3717 PetscErrorCode ierr; 3718 PetscInt i; 3719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3720 3721 PetscFunctionBegin; 3722 if (coloring->ctype == IS_COLORING_GLOBAL) { 3723 ISColoringValue *allcolors,*colors; 3724 ISColoring ocoloring; 3725 3726 /* set coloring for diagonal portion */ 3727 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3728 3729 /* set coloring for off-diagonal portion */ 3730 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3731 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3732 for (i=0; i<a->B->cmap->n; i++) { 3733 colors[i] = allcolors[a->garray[i]]; 3734 } 3735 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3736 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3737 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3738 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3739 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3740 ISColoringValue *colors; 3741 PetscInt *larray; 3742 ISColoring ocoloring; 3743 3744 /* set coloring for diagonal portion */ 3745 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3746 for (i=0; i<a->A->cmap->n; i++) { 3747 larray[i] = i + A->cmap->rstart; 3748 } 3749 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3750 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3751 for (i=0; i<a->A->cmap->n; i++) { 3752 colors[i] = coloring->colors[larray[i]]; 3753 } 3754 ierr = PetscFree(larray);CHKERRQ(ierr); 3755 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3756 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3757 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3758 3759 /* set coloring for off-diagonal portion */ 3760 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3761 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3762 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3763 for (i=0; i<a->B->cmap->n; i++) { 3764 colors[i] = coloring->colors[larray[i]]; 3765 } 3766 ierr = PetscFree(larray);CHKERRQ(ierr); 3767 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3768 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3769 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3770 } else { 3771 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3772 } 3773 3774 PetscFunctionReturn(0); 3775 } 3776 3777 #if defined(PETSC_HAVE_ADIC) 3778 #undef __FUNCT__ 3779 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3780 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3781 { 3782 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3783 PetscErrorCode ierr; 3784 3785 PetscFunctionBegin; 3786 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3787 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3788 PetscFunctionReturn(0); 3789 } 3790 #endif 3791 3792 #undef __FUNCT__ 3793 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3794 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3795 { 3796 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3797 PetscErrorCode ierr; 3798 3799 PetscFunctionBegin; 3800 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3801 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3802 PetscFunctionReturn(0); 3803 } 3804 3805 #undef __FUNCT__ 3806 #define __FUNCT__ "MatMerge" 3807 /*@ 3808 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3809 matrices from each processor 3810 3811 Collective on MPI_Comm 3812 3813 Input Parameters: 3814 + comm - the communicators the parallel matrix will live on 3815 . inmat - the input sequential matrices 3816 . n - number of local columns (or PETSC_DECIDE) 3817 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3818 3819 Output Parameter: 3820 . outmat - the parallel matrix generated 3821 3822 Level: advanced 3823 3824 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3825 3826 @*/ 3827 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3828 { 3829 PetscErrorCode ierr; 3830 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3831 PetscInt *indx; 3832 PetscScalar *values; 3833 3834 PetscFunctionBegin; 3835 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3836 if (scall == MAT_INITIAL_MATRIX){ 3837 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3838 if (n == PETSC_DECIDE){ 3839 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3840 } 3841 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3842 rstart -= m; 3843 3844 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3845 for (i=0;i<m;i++) { 3846 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3847 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3848 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3849 } 3850 /* This routine will ONLY return MPIAIJ type matrix */ 3851 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3852 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3853 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3854 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3855 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3856 3857 } else if (scall == MAT_REUSE_MATRIX){ 3858 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3859 } else { 3860 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3861 } 3862 3863 for (i=0;i<m;i++) { 3864 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3865 Ii = i + rstart; 3866 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3867 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3868 } 3869 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3870 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3871 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3872 3873 PetscFunctionReturn(0); 3874 } 3875 3876 #undef __FUNCT__ 3877 #define __FUNCT__ "MatFileSplit" 3878 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3879 { 3880 PetscErrorCode ierr; 3881 PetscMPIInt rank; 3882 PetscInt m,N,i,rstart,nnz; 3883 size_t len; 3884 const PetscInt *indx; 3885 PetscViewer out; 3886 char *name; 3887 Mat B; 3888 const PetscScalar *values; 3889 3890 PetscFunctionBegin; 3891 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3892 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3893 /* Should this be the type of the diagonal block of A? */ 3894 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3895 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3896 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3897 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3898 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3899 for (i=0;i<m;i++) { 3900 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3901 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3902 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3903 } 3904 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3905 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3906 3907 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3908 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3909 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3910 sprintf(name,"%s.%d",outfile,rank); 3911 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3912 ierr = PetscFree(name); 3913 ierr = MatView(B,out);CHKERRQ(ierr); 3914 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3915 ierr = MatDestroy(B);CHKERRQ(ierr); 3916 PetscFunctionReturn(0); 3917 } 3918 3919 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3920 #undef __FUNCT__ 3921 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3922 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3923 { 3924 PetscErrorCode ierr; 3925 Mat_Merge_SeqsToMPI *merge; 3926 PetscContainer container; 3927 3928 PetscFunctionBegin; 3929 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3930 if (container) { 3931 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3932 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3933 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3934 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3935 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3936 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3937 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3938 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3939 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3940 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3941 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3942 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3943 3944 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3945 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3946 } 3947 ierr = PetscFree(merge);CHKERRQ(ierr); 3948 3949 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3950 PetscFunctionReturn(0); 3951 } 3952 3953 #include "../src/mat/utils/freespace.h" 3954 #include "petscbt.h" 3955 3956 #undef __FUNCT__ 3957 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3958 /*@C 3959 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3960 matrices from each processor 3961 3962 Collective on MPI_Comm 3963 3964 Input Parameters: 3965 + comm - the communicators the parallel matrix will live on 3966 . seqmat - the input sequential matrices 3967 . m - number of local rows (or PETSC_DECIDE) 3968 . n - number of local columns (or PETSC_DECIDE) 3969 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3970 3971 Output Parameter: 3972 . mpimat - the parallel matrix generated 3973 3974 Level: advanced 3975 3976 Notes: 3977 The dimensions of the sequential matrix in each processor MUST be the same. 3978 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3979 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3980 @*/ 3981 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3982 { 3983 PetscErrorCode ierr; 3984 MPI_Comm comm=((PetscObject)mpimat)->comm; 3985 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3986 PetscMPIInt size,rank,taga,*len_s; 3987 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 3988 PetscInt proc,m; 3989 PetscInt **buf_ri,**buf_rj; 3990 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3991 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3992 MPI_Request *s_waits,*r_waits; 3993 MPI_Status *status; 3994 MatScalar *aa=a->a; 3995 MatScalar **abuf_r,*ba_i; 3996 Mat_Merge_SeqsToMPI *merge; 3997 PetscContainer container; 3998 3999 PetscFunctionBegin; 4000 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4001 4002 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4003 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4004 4005 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4006 if (container) { 4007 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4008 } 4009 bi = merge->bi; 4010 bj = merge->bj; 4011 buf_ri = merge->buf_ri; 4012 buf_rj = merge->buf_rj; 4013 4014 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4015 owners = merge->rowmap.range; 4016 len_s = merge->len_s; 4017 4018 /* send and recv matrix values */ 4019 /*-----------------------------*/ 4020 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4021 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4022 4023 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4024 for (proc=0,k=0; proc<size; proc++){ 4025 if (!len_s[proc]) continue; 4026 i = owners[proc]; 4027 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4028 k++; 4029 } 4030 4031 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4032 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4033 ierr = PetscFree(status);CHKERRQ(ierr); 4034 4035 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4036 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4037 4038 /* insert mat values of mpimat */ 4039 /*----------------------------*/ 4040 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4041 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4042 nextrow = buf_ri_k + merge->nrecv; 4043 nextai = nextrow + merge->nrecv; 4044 4045 for (k=0; k<merge->nrecv; k++){ 4046 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4047 nrows = *(buf_ri_k[k]); 4048 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4049 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4050 } 4051 4052 /* set values of ba */ 4053 m = merge->rowmap.n; 4054 for (i=0; i<m; i++) { 4055 arow = owners[rank] + i; 4056 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4057 bnzi = bi[i+1] - bi[i]; 4058 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4059 4060 /* add local non-zero vals of this proc's seqmat into ba */ 4061 anzi = ai[arow+1] - ai[arow]; 4062 aj = a->j + ai[arow]; 4063 aa = a->a + ai[arow]; 4064 nextaj = 0; 4065 for (j=0; nextaj<anzi; j++){ 4066 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4067 ba_i[j] += aa[nextaj++]; 4068 } 4069 } 4070 4071 /* add received vals into ba */ 4072 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4073 /* i-th row */ 4074 if (i == *nextrow[k]) { 4075 anzi = *(nextai[k]+1) - *nextai[k]; 4076 aj = buf_rj[k] + *(nextai[k]); 4077 aa = abuf_r[k] + *(nextai[k]); 4078 nextaj = 0; 4079 for (j=0; nextaj<anzi; j++){ 4080 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4081 ba_i[j] += aa[nextaj++]; 4082 } 4083 } 4084 nextrow[k]++; nextai[k]++; 4085 } 4086 } 4087 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4088 } 4089 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4090 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4091 4092 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4093 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4094 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4095 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4096 PetscFunctionReturn(0); 4097 } 4098 4099 #undef __FUNCT__ 4100 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4101 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4102 { 4103 PetscErrorCode ierr; 4104 Mat B_mpi; 4105 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4106 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4107 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4108 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4109 PetscInt len,proc,*dnz,*onz; 4110 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4111 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4112 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4113 MPI_Status *status; 4114 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4115 PetscBT lnkbt; 4116 Mat_Merge_SeqsToMPI *merge; 4117 PetscContainer container; 4118 4119 PetscFunctionBegin; 4120 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4121 4122 /* make sure it is a PETSc comm */ 4123 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4124 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4125 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4126 4127 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4128 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4129 4130 /* determine row ownership */ 4131 /*---------------------------------------------------------*/ 4132 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4133 merge->rowmap.n = m; 4134 merge->rowmap.N = M; 4135 merge->rowmap.bs = 1; 4136 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4137 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4138 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4139 4140 m = merge->rowmap.n; 4141 M = merge->rowmap.N; 4142 owners = merge->rowmap.range; 4143 4144 /* determine the number of messages to send, their lengths */ 4145 /*---------------------------------------------------------*/ 4146 len_s = merge->len_s; 4147 4148 len = 0; /* length of buf_si[] */ 4149 merge->nsend = 0; 4150 for (proc=0; proc<size; proc++){ 4151 len_si[proc] = 0; 4152 if (proc == rank){ 4153 len_s[proc] = 0; 4154 } else { 4155 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4156 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4157 } 4158 if (len_s[proc]) { 4159 merge->nsend++; 4160 nrows = 0; 4161 for (i=owners[proc]; i<owners[proc+1]; i++){ 4162 if (ai[i+1] > ai[i]) nrows++; 4163 } 4164 len_si[proc] = 2*(nrows+1); 4165 len += len_si[proc]; 4166 } 4167 } 4168 4169 /* determine the number and length of messages to receive for ij-structure */ 4170 /*-------------------------------------------------------------------------*/ 4171 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4172 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4173 4174 /* post the Irecv of j-structure */ 4175 /*-------------------------------*/ 4176 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4177 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4178 4179 /* post the Isend of j-structure */ 4180 /*--------------------------------*/ 4181 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4182 sj_waits = si_waits + merge->nsend; 4183 4184 for (proc=0, k=0; proc<size; proc++){ 4185 if (!len_s[proc]) continue; 4186 i = owners[proc]; 4187 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4188 k++; 4189 } 4190 4191 /* receives and sends of j-structure are complete */ 4192 /*------------------------------------------------*/ 4193 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4194 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4195 4196 /* send and recv i-structure */ 4197 /*---------------------------*/ 4198 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4199 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4200 4201 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4202 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4203 for (proc=0,k=0; proc<size; proc++){ 4204 if (!len_s[proc]) continue; 4205 /* form outgoing message for i-structure: 4206 buf_si[0]: nrows to be sent 4207 [1:nrows]: row index (global) 4208 [nrows+1:2*nrows+1]: i-structure index 4209 */ 4210 /*-------------------------------------------*/ 4211 nrows = len_si[proc]/2 - 1; 4212 buf_si_i = buf_si + nrows+1; 4213 buf_si[0] = nrows; 4214 buf_si_i[0] = 0; 4215 nrows = 0; 4216 for (i=owners[proc]; i<owners[proc+1]; i++){ 4217 anzi = ai[i+1] - ai[i]; 4218 if (anzi) { 4219 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4220 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4221 nrows++; 4222 } 4223 } 4224 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4225 k++; 4226 buf_si += len_si[proc]; 4227 } 4228 4229 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4230 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4231 4232 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4233 for (i=0; i<merge->nrecv; i++){ 4234 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4235 } 4236 4237 ierr = PetscFree(len_si);CHKERRQ(ierr); 4238 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4239 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4240 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4241 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4242 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4243 ierr = PetscFree(status);CHKERRQ(ierr); 4244 4245 /* compute a local seq matrix in each processor */ 4246 /*----------------------------------------------*/ 4247 /* allocate bi array and free space for accumulating nonzero column info */ 4248 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4249 bi[0] = 0; 4250 4251 /* create and initialize a linked list */ 4252 nlnk = N+1; 4253 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4254 4255 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4256 len = 0; 4257 len = ai[owners[rank+1]] - ai[owners[rank]]; 4258 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4259 current_space = free_space; 4260 4261 /* determine symbolic info for each local row */ 4262 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4263 nextrow = buf_ri_k + merge->nrecv; 4264 nextai = nextrow + merge->nrecv; 4265 for (k=0; k<merge->nrecv; k++){ 4266 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4267 nrows = *buf_ri_k[k]; 4268 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4269 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4270 } 4271 4272 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4273 len = 0; 4274 for (i=0;i<m;i++) { 4275 bnzi = 0; 4276 /* add local non-zero cols of this proc's seqmat into lnk */ 4277 arow = owners[rank] + i; 4278 anzi = ai[arow+1] - ai[arow]; 4279 aj = a->j + ai[arow]; 4280 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4281 bnzi += nlnk; 4282 /* add received col data into lnk */ 4283 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4284 if (i == *nextrow[k]) { /* i-th row */ 4285 anzi = *(nextai[k]+1) - *nextai[k]; 4286 aj = buf_rj[k] + *nextai[k]; 4287 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4288 bnzi += nlnk; 4289 nextrow[k]++; nextai[k]++; 4290 } 4291 } 4292 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4293 4294 /* if free space is not available, make more free space */ 4295 if (current_space->local_remaining<bnzi) { 4296 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4297 nspacedouble++; 4298 } 4299 /* copy data into free space, then initialize lnk */ 4300 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4301 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4302 4303 current_space->array += bnzi; 4304 current_space->local_used += bnzi; 4305 current_space->local_remaining -= bnzi; 4306 4307 bi[i+1] = bi[i] + bnzi; 4308 } 4309 4310 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4311 4312 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4313 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4314 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4315 4316 /* create symbolic parallel matrix B_mpi */ 4317 /*---------------------------------------*/ 4318 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4319 if (n==PETSC_DECIDE) { 4320 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4321 } else { 4322 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4323 } 4324 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4325 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4326 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4327 4328 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4329 B_mpi->assembled = PETSC_FALSE; 4330 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4331 merge->bi = bi; 4332 merge->bj = bj; 4333 merge->buf_ri = buf_ri; 4334 merge->buf_rj = buf_rj; 4335 merge->coi = PETSC_NULL; 4336 merge->coj = PETSC_NULL; 4337 merge->owners_co = PETSC_NULL; 4338 4339 /* attach the supporting struct to B_mpi for reuse */ 4340 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4341 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4342 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4343 *mpimat = B_mpi; 4344 4345 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4346 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4347 PetscFunctionReturn(0); 4348 } 4349 4350 #undef __FUNCT__ 4351 #define __FUNCT__ "MatMerge_SeqsToMPI" 4352 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4353 { 4354 PetscErrorCode ierr; 4355 4356 PetscFunctionBegin; 4357 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4358 if (scall == MAT_INITIAL_MATRIX){ 4359 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4360 } 4361 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4362 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4363 PetscFunctionReturn(0); 4364 } 4365 4366 #undef __FUNCT__ 4367 #define __FUNCT__ "MatGetLocalMat" 4368 /*@ 4369 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4370 4371 Not Collective 4372 4373 Input Parameters: 4374 + A - the matrix 4375 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4376 4377 Output Parameter: 4378 . A_loc - the local sequential matrix generated 4379 4380 Level: developer 4381 4382 @*/ 4383 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4384 { 4385 PetscErrorCode ierr; 4386 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4387 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4388 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4389 MatScalar *aa=a->a,*ba=b->a,*cam; 4390 PetscScalar *ca; 4391 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4392 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4393 4394 PetscFunctionBegin; 4395 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4396 if (scall == MAT_INITIAL_MATRIX){ 4397 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4398 ci[0] = 0; 4399 for (i=0; i<am; i++){ 4400 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4401 } 4402 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4403 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4404 k = 0; 4405 for (i=0; i<am; i++) { 4406 ncols_o = bi[i+1] - bi[i]; 4407 ncols_d = ai[i+1] - ai[i]; 4408 /* off-diagonal portion of A */ 4409 for (jo=0; jo<ncols_o; jo++) { 4410 col = cmap[*bj]; 4411 if (col >= cstart) break; 4412 cj[k] = col; bj++; 4413 ca[k++] = *ba++; 4414 } 4415 /* diagonal portion of A */ 4416 for (j=0; j<ncols_d; j++) { 4417 cj[k] = cstart + *aj++; 4418 ca[k++] = *aa++; 4419 } 4420 /* off-diagonal portion of A */ 4421 for (j=jo; j<ncols_o; j++) { 4422 cj[k] = cmap[*bj++]; 4423 ca[k++] = *ba++; 4424 } 4425 } 4426 /* put together the new matrix */ 4427 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4428 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4429 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4430 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4431 mat->free_a = PETSC_TRUE; 4432 mat->free_ij = PETSC_TRUE; 4433 mat->nonew = 0; 4434 } else if (scall == MAT_REUSE_MATRIX){ 4435 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4436 ci = mat->i; cj = mat->j; cam = mat->a; 4437 for (i=0; i<am; i++) { 4438 /* off-diagonal portion of A */ 4439 ncols_o = bi[i+1] - bi[i]; 4440 for (jo=0; jo<ncols_o; jo++) { 4441 col = cmap[*bj]; 4442 if (col >= cstart) break; 4443 *cam++ = *ba++; bj++; 4444 } 4445 /* diagonal portion of A */ 4446 ncols_d = ai[i+1] - ai[i]; 4447 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4448 /* off-diagonal portion of A */ 4449 for (j=jo; j<ncols_o; j++) { 4450 *cam++ = *ba++; bj++; 4451 } 4452 } 4453 } else { 4454 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4455 } 4456 4457 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4458 PetscFunctionReturn(0); 4459 } 4460 4461 #undef __FUNCT__ 4462 #define __FUNCT__ "MatGetLocalMatCondensed" 4463 /*@C 4464 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4465 4466 Not Collective 4467 4468 Input Parameters: 4469 + A - the matrix 4470 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4471 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4472 4473 Output Parameter: 4474 . A_loc - the local sequential matrix generated 4475 4476 Level: developer 4477 4478 @*/ 4479 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4480 { 4481 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4482 PetscErrorCode ierr; 4483 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4484 IS isrowa,iscola; 4485 Mat *aloc; 4486 4487 PetscFunctionBegin; 4488 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4489 if (!row){ 4490 start = A->rmap->rstart; end = A->rmap->rend; 4491 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4492 } else { 4493 isrowa = *row; 4494 } 4495 if (!col){ 4496 start = A->cmap->rstart; 4497 cmap = a->garray; 4498 nzA = a->A->cmap->n; 4499 nzB = a->B->cmap->n; 4500 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4501 ncols = 0; 4502 for (i=0; i<nzB; i++) { 4503 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4504 else break; 4505 } 4506 imark = i; 4507 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4508 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4509 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4510 ierr = PetscFree(idx);CHKERRQ(ierr); 4511 } else { 4512 iscola = *col; 4513 } 4514 if (scall != MAT_INITIAL_MATRIX){ 4515 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4516 aloc[0] = *A_loc; 4517 } 4518 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4519 *A_loc = aloc[0]; 4520 ierr = PetscFree(aloc);CHKERRQ(ierr); 4521 if (!row){ 4522 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4523 } 4524 if (!col){ 4525 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4526 } 4527 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4528 PetscFunctionReturn(0); 4529 } 4530 4531 #undef __FUNCT__ 4532 #define __FUNCT__ "MatGetBrowsOfAcols" 4533 /*@C 4534 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4535 4536 Collective on Mat 4537 4538 Input Parameters: 4539 + A,B - the matrices in mpiaij format 4540 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4541 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4542 4543 Output Parameter: 4544 + rowb, colb - index sets of rows and columns of B to extract 4545 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4546 - B_seq - the sequential matrix generated 4547 4548 Level: developer 4549 4550 @*/ 4551 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4552 { 4553 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4554 PetscErrorCode ierr; 4555 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4556 IS isrowb,iscolb; 4557 Mat *bseq; 4558 4559 PetscFunctionBegin; 4560 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4561 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4562 } 4563 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4564 4565 if (scall == MAT_INITIAL_MATRIX){ 4566 start = A->cmap->rstart; 4567 cmap = a->garray; 4568 nzA = a->A->cmap->n; 4569 nzB = a->B->cmap->n; 4570 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4571 ncols = 0; 4572 for (i=0; i<nzB; i++) { /* row < local row index */ 4573 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4574 else break; 4575 } 4576 imark = i; 4577 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4578 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4579 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4580 ierr = PetscFree(idx);CHKERRQ(ierr); 4581 *brstart = imark; 4582 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4583 } else { 4584 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4585 isrowb = *rowb; iscolb = *colb; 4586 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4587 bseq[0] = *B_seq; 4588 } 4589 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4590 *B_seq = bseq[0]; 4591 ierr = PetscFree(bseq);CHKERRQ(ierr); 4592 if (!rowb){ 4593 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4594 } else { 4595 *rowb = isrowb; 4596 } 4597 if (!colb){ 4598 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4599 } else { 4600 *colb = iscolb; 4601 } 4602 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4603 PetscFunctionReturn(0); 4604 } 4605 4606 #undef __FUNCT__ 4607 #define __FUNCT__ "MatGetBrowsOfAoCols" 4608 /*@C 4609 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4610 of the OFF-DIAGONAL portion of local A 4611 4612 Collective on Mat 4613 4614 Input Parameters: 4615 + A,B - the matrices in mpiaij format 4616 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4617 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4618 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4619 4620 Output Parameter: 4621 + B_oth - the sequential matrix generated 4622 4623 Level: developer 4624 4625 @*/ 4626 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4627 { 4628 VecScatter_MPI_General *gen_to,*gen_from; 4629 PetscErrorCode ierr; 4630 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4631 Mat_SeqAIJ *b_oth; 4632 VecScatter ctx=a->Mvctx; 4633 MPI_Comm comm=((PetscObject)ctx)->comm; 4634 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4635 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4636 PetscScalar *rvalues,*svalues; 4637 MatScalar *b_otha,*bufa,*bufA; 4638 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4639 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4640 MPI_Status *sstatus,rstatus; 4641 PetscMPIInt jj; 4642 PetscInt *cols,sbs,rbs; 4643 PetscScalar *vals; 4644 4645 PetscFunctionBegin; 4646 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4647 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4648 } 4649 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4650 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4651 4652 gen_to = (VecScatter_MPI_General*)ctx->todata; 4653 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4654 rvalues = gen_from->values; /* holds the length of receiving row */ 4655 svalues = gen_to->values; /* holds the length of sending row */ 4656 nrecvs = gen_from->n; 4657 nsends = gen_to->n; 4658 4659 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4660 srow = gen_to->indices; /* local row index to be sent */ 4661 sstarts = gen_to->starts; 4662 sprocs = gen_to->procs; 4663 sstatus = gen_to->sstatus; 4664 sbs = gen_to->bs; 4665 rstarts = gen_from->starts; 4666 rprocs = gen_from->procs; 4667 rbs = gen_from->bs; 4668 4669 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4670 if (scall == MAT_INITIAL_MATRIX){ 4671 /* i-array */ 4672 /*---------*/ 4673 /* post receives */ 4674 for (i=0; i<nrecvs; i++){ 4675 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4676 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4677 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4678 } 4679 4680 /* pack the outgoing message */ 4681 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4682 rstartsj = sstartsj + nsends +1; 4683 sstartsj[0] = 0; rstartsj[0] = 0; 4684 len = 0; /* total length of j or a array to be sent */ 4685 k = 0; 4686 for (i=0; i<nsends; i++){ 4687 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4688 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4689 for (j=0; j<nrows; j++) { 4690 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4691 for (l=0; l<sbs; l++){ 4692 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4693 rowlen[j*sbs+l] = ncols; 4694 len += ncols; 4695 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4696 } 4697 k++; 4698 } 4699 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4700 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4701 } 4702 /* recvs and sends of i-array are completed */ 4703 i = nrecvs; 4704 while (i--) { 4705 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4706 } 4707 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4708 4709 /* allocate buffers for sending j and a arrays */ 4710 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4711 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4712 4713 /* create i-array of B_oth */ 4714 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4715 b_othi[0] = 0; 4716 len = 0; /* total length of j or a array to be received */ 4717 k = 0; 4718 for (i=0; i<nrecvs; i++){ 4719 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4720 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4721 for (j=0; j<nrows; j++) { 4722 b_othi[k+1] = b_othi[k] + rowlen[j]; 4723 len += rowlen[j]; k++; 4724 } 4725 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4726 } 4727 4728 /* allocate space for j and a arrrays of B_oth */ 4729 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4730 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4731 4732 /* j-array */ 4733 /*---------*/ 4734 /* post receives of j-array */ 4735 for (i=0; i<nrecvs; i++){ 4736 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4737 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4738 } 4739 4740 /* pack the outgoing message j-array */ 4741 k = 0; 4742 for (i=0; i<nsends; i++){ 4743 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4744 bufJ = bufj+sstartsj[i]; 4745 for (j=0; j<nrows; j++) { 4746 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4747 for (ll=0; ll<sbs; ll++){ 4748 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4749 for (l=0; l<ncols; l++){ 4750 *bufJ++ = cols[l]; 4751 } 4752 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4753 } 4754 } 4755 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4756 } 4757 4758 /* recvs and sends of j-array are completed */ 4759 i = nrecvs; 4760 while (i--) { 4761 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4762 } 4763 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4764 } else if (scall == MAT_REUSE_MATRIX){ 4765 sstartsj = *startsj; 4766 rstartsj = sstartsj + nsends +1; 4767 bufa = *bufa_ptr; 4768 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4769 b_otha = b_oth->a; 4770 } else { 4771 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4772 } 4773 4774 /* a-array */ 4775 /*---------*/ 4776 /* post receives of a-array */ 4777 for (i=0; i<nrecvs; i++){ 4778 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4779 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4780 } 4781 4782 /* pack the outgoing message a-array */ 4783 k = 0; 4784 for (i=0; i<nsends; i++){ 4785 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4786 bufA = bufa+sstartsj[i]; 4787 for (j=0; j<nrows; j++) { 4788 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4789 for (ll=0; ll<sbs; ll++){ 4790 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4791 for (l=0; l<ncols; l++){ 4792 *bufA++ = vals[l]; 4793 } 4794 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4795 } 4796 } 4797 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4798 } 4799 /* recvs and sends of a-array are completed */ 4800 i = nrecvs; 4801 while (i--) { 4802 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4803 } 4804 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4805 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4806 4807 if (scall == MAT_INITIAL_MATRIX){ 4808 /* put together the new matrix */ 4809 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4810 4811 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4812 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4813 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4814 b_oth->free_a = PETSC_TRUE; 4815 b_oth->free_ij = PETSC_TRUE; 4816 b_oth->nonew = 0; 4817 4818 ierr = PetscFree(bufj);CHKERRQ(ierr); 4819 if (!startsj || !bufa_ptr){ 4820 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4821 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4822 } else { 4823 *startsj = sstartsj; 4824 *bufa_ptr = bufa; 4825 } 4826 } 4827 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4828 PetscFunctionReturn(0); 4829 } 4830 4831 #undef __FUNCT__ 4832 #define __FUNCT__ "MatGetCommunicationStructs" 4833 /*@C 4834 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4835 4836 Not Collective 4837 4838 Input Parameters: 4839 . A - The matrix in mpiaij format 4840 4841 Output Parameter: 4842 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4843 . colmap - A map from global column index to local index into lvec 4844 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4845 4846 Level: developer 4847 4848 @*/ 4849 #if defined (PETSC_USE_CTABLE) 4850 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4851 #else 4852 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4853 #endif 4854 { 4855 Mat_MPIAIJ *a; 4856 4857 PetscFunctionBegin; 4858 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4859 PetscValidPointer(lvec, 2) 4860 PetscValidPointer(colmap, 3) 4861 PetscValidPointer(multScatter, 4) 4862 a = (Mat_MPIAIJ *) A->data; 4863 if (lvec) *lvec = a->lvec; 4864 if (colmap) *colmap = a->colmap; 4865 if (multScatter) *multScatter = a->Mvctx; 4866 PetscFunctionReturn(0); 4867 } 4868 4869 EXTERN_C_BEGIN 4870 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4871 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4872 EXTERN_C_END 4873 4874 #include "../src/mat/impls/dense/mpi/mpidense.h" 4875 4876 #undef __FUNCT__ 4877 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4878 /* 4879 Computes (B'*A')' since computing B*A directly is untenable 4880 4881 n p p 4882 ( ) ( ) ( ) 4883 m ( A ) * n ( B ) = m ( C ) 4884 ( ) ( ) ( ) 4885 4886 */ 4887 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4888 { 4889 PetscErrorCode ierr; 4890 Mat At,Bt,Ct; 4891 4892 PetscFunctionBegin; 4893 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4894 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4895 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4896 ierr = MatDestroy(At);CHKERRQ(ierr); 4897 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4898 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4899 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4900 PetscFunctionReturn(0); 4901 } 4902 4903 #undef __FUNCT__ 4904 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4905 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4906 { 4907 PetscErrorCode ierr; 4908 PetscInt m=A->rmap->n,n=B->cmap->n; 4909 Mat Cmat; 4910 4911 PetscFunctionBegin; 4912 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4913 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4914 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4915 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4916 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4917 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4918 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4919 *C = Cmat; 4920 PetscFunctionReturn(0); 4921 } 4922 4923 /* ----------------------------------------------------------------*/ 4924 #undef __FUNCT__ 4925 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4926 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4927 { 4928 PetscErrorCode ierr; 4929 4930 PetscFunctionBegin; 4931 if (scall == MAT_INITIAL_MATRIX){ 4932 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4933 } 4934 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4935 PetscFunctionReturn(0); 4936 } 4937 4938 EXTERN_C_BEGIN 4939 #if defined(PETSC_HAVE_MUMPS) 4940 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4941 #endif 4942 #if defined(PETSC_HAVE_PASTIX) 4943 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4944 #endif 4945 #if defined(PETSC_HAVE_SUPERLU_DIST) 4946 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4947 #endif 4948 #if defined(PETSC_HAVE_SPOOLES) 4949 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4950 #endif 4951 EXTERN_C_END 4952 4953 /*MC 4954 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4955 4956 Options Database Keys: 4957 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4958 4959 Level: beginner 4960 4961 .seealso: MatCreateMPIAIJ() 4962 M*/ 4963 4964 EXTERN_C_BEGIN 4965 #undef __FUNCT__ 4966 #define __FUNCT__ "MatCreate_MPIAIJ" 4967 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4968 { 4969 Mat_MPIAIJ *b; 4970 PetscErrorCode ierr; 4971 PetscMPIInt size; 4972 4973 PetscFunctionBegin; 4974 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4975 4976 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4977 B->data = (void*)b; 4978 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4979 B->rmap->bs = 1; 4980 B->assembled = PETSC_FALSE; 4981 B->mapping = 0; 4982 4983 B->insertmode = NOT_SET_VALUES; 4984 b->size = size; 4985 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4986 4987 /* build cache for off array entries formed */ 4988 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4989 b->donotstash = PETSC_FALSE; 4990 b->colmap = 0; 4991 b->garray = 0; 4992 b->roworiented = PETSC_TRUE; 4993 4994 /* stuff used for matrix vector multiply */ 4995 b->lvec = PETSC_NULL; 4996 b->Mvctx = PETSC_NULL; 4997 4998 /* stuff for MatGetRow() */ 4999 b->rowindices = 0; 5000 b->rowvalues = 0; 5001 b->getrowactive = PETSC_FALSE; 5002 5003 #if defined(PETSC_HAVE_SPOOLES) 5004 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 5005 "MatGetFactor_mpiaij_spooles", 5006 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5007 #endif 5008 #if defined(PETSC_HAVE_MUMPS) 5009 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 5010 "MatGetFactor_mpiaij_mumps", 5011 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5012 #endif 5013 #if defined(PETSC_HAVE_PASTIX) 5014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 5015 "MatGetFactor_mpiaij_pastix", 5016 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5017 #endif 5018 #if defined(PETSC_HAVE_SUPERLU_DIST) 5019 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 5020 "MatGetFactor_mpiaij_superlu_dist", 5021 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5022 #endif 5023 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5024 "MatStoreValues_MPIAIJ", 5025 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5026 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5027 "MatRetrieveValues_MPIAIJ", 5028 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5029 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5030 "MatGetDiagonalBlock_MPIAIJ", 5031 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5032 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5033 "MatIsTranspose_MPIAIJ", 5034 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5035 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5036 "MatMPIAIJSetPreallocation_MPIAIJ", 5037 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5038 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5039 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5040 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5041 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5042 "MatDiagonalScaleLocal_MPIAIJ", 5043 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5044 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5045 "MatConvert_MPIAIJ_MPICSRPERM", 5046 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5047 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5048 "MatConvert_MPIAIJ_MPICRL", 5049 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5050 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5051 "MatMatMult_MPIDense_MPIAIJ", 5052 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5053 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5054 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5055 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5056 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5057 "MatMatMultNumeric_MPIDense_MPIAIJ", 5058 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5059 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5060 PetscFunctionReturn(0); 5061 } 5062 EXTERN_C_END 5063 5064 #undef __FUNCT__ 5065 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5066 /*@ 5067 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5068 and "off-diagonal" part of the matrix in CSR format. 5069 5070 Collective on MPI_Comm 5071 5072 Input Parameters: 5073 + comm - MPI communicator 5074 . m - number of local rows (Cannot be PETSC_DECIDE) 5075 . n - This value should be the same as the local size used in creating the 5076 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5077 calculated if N is given) For square matrices n is almost always m. 5078 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5079 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5080 . i - row indices for "diagonal" portion of matrix 5081 . j - column indices 5082 . a - matrix values 5083 . oi - row indices for "off-diagonal" portion of matrix 5084 . oj - column indices 5085 - oa - matrix values 5086 5087 Output Parameter: 5088 . mat - the matrix 5089 5090 Level: advanced 5091 5092 Notes: 5093 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5094 5095 The i and j indices are 0 based 5096 5097 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5098 5099 5100 .keywords: matrix, aij, compressed row, sparse, parallel 5101 5102 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5103 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5104 @*/ 5105 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5106 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5107 { 5108 PetscErrorCode ierr; 5109 Mat_MPIAIJ *maij; 5110 5111 PetscFunctionBegin; 5112 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5113 if (i[0]) { 5114 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5115 } 5116 if (oi[0]) { 5117 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5118 } 5119 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5120 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5121 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5122 maij = (Mat_MPIAIJ*) (*mat)->data; 5123 maij->donotstash = PETSC_TRUE; 5124 (*mat)->preallocated = PETSC_TRUE; 5125 5126 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5127 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5128 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5129 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5130 5131 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5132 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5133 5134 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5135 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5136 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5137 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5138 5139 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5140 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5141 PetscFunctionReturn(0); 5142 } 5143 5144 /* 5145 Special version for direct calls from Fortran 5146 */ 5147 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5148 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5149 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5150 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5151 #endif 5152 5153 /* Change these macros so can be used in void function */ 5154 #undef CHKERRQ 5155 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5156 #undef SETERRQ2 5157 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5158 #undef SETERRQ 5159 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5160 5161 EXTERN_C_BEGIN 5162 #undef __FUNCT__ 5163 #define __FUNCT__ "matsetvaluesmpiaij_" 5164 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5165 { 5166 Mat mat = *mmat; 5167 PetscInt m = *mm, n = *mn; 5168 InsertMode addv = *maddv; 5169 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5170 PetscScalar value; 5171 PetscErrorCode ierr; 5172 5173 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5174 if (mat->insertmode == NOT_SET_VALUES) { 5175 mat->insertmode = addv; 5176 } 5177 #if defined(PETSC_USE_DEBUG) 5178 else if (mat->insertmode != addv) { 5179 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5180 } 5181 #endif 5182 { 5183 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5184 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5185 PetscTruth roworiented = aij->roworiented; 5186 5187 /* Some Variables required in the macro */ 5188 Mat A = aij->A; 5189 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5190 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5191 MatScalar *aa = a->a; 5192 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5193 Mat B = aij->B; 5194 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5195 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5196 MatScalar *ba = b->a; 5197 5198 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5199 PetscInt nonew = a->nonew; 5200 MatScalar *ap1,*ap2; 5201 5202 PetscFunctionBegin; 5203 for (i=0; i<m; i++) { 5204 if (im[i] < 0) continue; 5205 #if defined(PETSC_USE_DEBUG) 5206 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5207 #endif 5208 if (im[i] >= rstart && im[i] < rend) { 5209 row = im[i] - rstart; 5210 lastcol1 = -1; 5211 rp1 = aj + ai[row]; 5212 ap1 = aa + ai[row]; 5213 rmax1 = aimax[row]; 5214 nrow1 = ailen[row]; 5215 low1 = 0; 5216 high1 = nrow1; 5217 lastcol2 = -1; 5218 rp2 = bj + bi[row]; 5219 ap2 = ba + bi[row]; 5220 rmax2 = bimax[row]; 5221 nrow2 = bilen[row]; 5222 low2 = 0; 5223 high2 = nrow2; 5224 5225 for (j=0; j<n; j++) { 5226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5227 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5228 if (in[j] >= cstart && in[j] < cend){ 5229 col = in[j] - cstart; 5230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5231 } else if (in[j] < 0) continue; 5232 #if defined(PETSC_USE_DEBUG) 5233 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5234 #endif 5235 else { 5236 if (mat->was_assembled) { 5237 if (!aij->colmap) { 5238 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5239 } 5240 #if defined (PETSC_USE_CTABLE) 5241 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5242 col--; 5243 #else 5244 col = aij->colmap[in[j]] - 1; 5245 #endif 5246 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5247 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5248 col = in[j]; 5249 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5250 B = aij->B; 5251 b = (Mat_SeqAIJ*)B->data; 5252 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5253 rp2 = bj + bi[row]; 5254 ap2 = ba + bi[row]; 5255 rmax2 = bimax[row]; 5256 nrow2 = bilen[row]; 5257 low2 = 0; 5258 high2 = nrow2; 5259 bm = aij->B->rmap->n; 5260 ba = b->a; 5261 } 5262 } else col = in[j]; 5263 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5264 } 5265 } 5266 } else { 5267 if (!aij->donotstash) { 5268 if (roworiented) { 5269 if (ignorezeroentries && v[i*n] == 0.0) continue; 5270 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5271 } else { 5272 if (ignorezeroentries && v[i] == 0.0) continue; 5273 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5274 } 5275 } 5276 } 5277 }} 5278 PetscFunctionReturnVoid(); 5279 } 5280 EXTERN_C_END 5281 5282