1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "../src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap->N; 932 header[2] = mat->cmap->N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap->n; 938 range = mat->rmap->range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap->n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap->n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap->n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap->n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap->N > 1024) { 1114 PetscTruth flg = PETSC_FALSE; 1115 1116 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1117 if (!flg) { 1118 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1119 } 1120 } 1121 1122 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1123 if (!rank) { 1124 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1125 } else { 1126 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1127 } 1128 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1129 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1130 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1132 1133 /* copy over the A part */ 1134 Aloc = (Mat_SeqAIJ*)aij->A->data; 1135 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1136 row = mat->rmap->rstart; 1137 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1138 for (i=0; i<m; i++) { 1139 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1140 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1141 } 1142 aj = Aloc->j; 1143 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1144 1145 /* copy over the B part */ 1146 Aloc = (Mat_SeqAIJ*)aij->B->data; 1147 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148 row = mat->rmap->rstart; 1149 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1150 ct = cols; 1151 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1155 } 1156 ierr = PetscFree(ct);CHKERRQ(ierr); 1157 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1158 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1159 /* 1160 Everyone has to call to draw the matrix since the graphics waits are 1161 synchronized across all processors that share the PetscDraw object 1162 */ 1163 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1164 if (!rank) { 1165 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1166 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1167 } 1168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169 ierr = MatDestroy(A);CHKERRQ(ierr); 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatView_MPIAIJ" 1176 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1177 { 1178 PetscErrorCode ierr; 1179 PetscTruth iascii,isdraw,issocket,isbinary; 1180 1181 PetscFunctionBegin; 1182 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1183 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1184 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1185 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1186 if (iascii || isdraw || isbinary || issocket) { 1187 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1188 } else { 1189 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatRelax_MPIAIJ" 1196 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1197 { 1198 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1199 PetscErrorCode ierr; 1200 Vec bb1; 1201 1202 PetscFunctionBegin; 1203 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1204 1205 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1206 if (flag & SOR_ZERO_INITIAL_GUESS) { 1207 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1208 its--; 1209 } 1210 1211 while (its--) { 1212 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 1215 /* update rhs: bb1 = bb - B*x */ 1216 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1217 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1218 1219 /* local sweep */ 1220 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1221 } 1222 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1223 if (flag & SOR_ZERO_INITIAL_GUESS) { 1224 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1225 its--; 1226 } 1227 while (its--) { 1228 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 1231 /* update rhs: bb1 = bb - B*x */ 1232 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1233 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1234 1235 /* local sweep */ 1236 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1237 } 1238 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1239 if (flag & SOR_ZERO_INITIAL_GUESS) { 1240 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1241 its--; 1242 } 1243 while (its--) { 1244 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 1247 /* update rhs: bb1 = bb - B*x */ 1248 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1249 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1250 1251 /* local sweep */ 1252 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1253 } 1254 } else { 1255 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1256 } 1257 1258 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatPermute_MPIAIJ" 1264 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1265 { 1266 MPI_Comm comm,pcomm; 1267 PetscInt first,local_size,nrows; 1268 const PetscInt *rows; 1269 PetscMPIInt size; 1270 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1275 /* make a collective version of 'rowp' */ 1276 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1277 if (pcomm==comm) { 1278 crowp = rowp; 1279 } else { 1280 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1281 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1282 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1283 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1284 } 1285 /* collect the global row permutation and invert it */ 1286 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1287 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1288 if (pcomm!=comm) { 1289 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1290 } 1291 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1292 /* get the local target indices */ 1293 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1294 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1295 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1296 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1297 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1298 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1299 /* the column permutation is so much easier; 1300 make a local version of 'colp' and invert it */ 1301 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1302 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1303 if (size==1) { 1304 lcolp = colp; 1305 } else { 1306 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1307 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1308 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1309 } 1310 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1311 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1312 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1313 if (size>1) { 1314 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1315 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1316 } 1317 /* now we just get the submatrix */ 1318 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1319 /* clean up */ 1320 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1321 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1327 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1328 { 1329 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1330 Mat A = mat->A,B = mat->B; 1331 PetscErrorCode ierr; 1332 PetscReal isend[5],irecv[5]; 1333 1334 PetscFunctionBegin; 1335 info->block_size = 1.0; 1336 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1337 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1338 isend[3] = info->memory; isend[4] = info->mallocs; 1339 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1340 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1341 isend[3] += info->memory; isend[4] += info->mallocs; 1342 if (flag == MAT_LOCAL) { 1343 info->nz_used = isend[0]; 1344 info->nz_allocated = isend[1]; 1345 info->nz_unneeded = isend[2]; 1346 info->memory = isend[3]; 1347 info->mallocs = isend[4]; 1348 } else if (flag == MAT_GLOBAL_MAX) { 1349 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1350 info->nz_used = irecv[0]; 1351 info->nz_allocated = irecv[1]; 1352 info->nz_unneeded = irecv[2]; 1353 info->memory = irecv[3]; 1354 info->mallocs = irecv[4]; 1355 } else if (flag == MAT_GLOBAL_SUM) { 1356 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1357 info->nz_used = irecv[0]; 1358 info->nz_allocated = irecv[1]; 1359 info->nz_unneeded = irecv[2]; 1360 info->memory = irecv[3]; 1361 info->mallocs = irecv[4]; 1362 } 1363 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1364 info->fill_ratio_needed = 0; 1365 info->factor_mallocs = 0; 1366 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatSetOption_MPIAIJ" 1372 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1373 { 1374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 switch (op) { 1379 case MAT_NEW_NONZERO_LOCATIONS: 1380 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1381 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1382 case MAT_KEEP_NONZERO_PATTERN: 1383 case MAT_NEW_NONZERO_LOCATION_ERR: 1384 case MAT_USE_INODES: 1385 case MAT_IGNORE_ZERO_ENTRIES: 1386 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1387 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1388 break; 1389 case MAT_ROW_ORIENTED: 1390 a->roworiented = flg; 1391 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1392 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1393 break; 1394 case MAT_NEW_DIAGONALS: 1395 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1396 break; 1397 case MAT_IGNORE_OFF_PROC_ENTRIES: 1398 a->donotstash = PETSC_TRUE; 1399 break; 1400 case MAT_SYMMETRIC: 1401 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1402 break; 1403 case MAT_STRUCTURALLY_SYMMETRIC: 1404 case MAT_HERMITIAN: 1405 case MAT_SYMMETRY_ETERNAL: 1406 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1407 break; 1408 default: 1409 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatGetRow_MPIAIJ" 1416 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1417 { 1418 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1419 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1420 PetscErrorCode ierr; 1421 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1422 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1423 PetscInt *cmap,*idx_p; 1424 1425 PetscFunctionBegin; 1426 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1427 mat->getrowactive = PETSC_TRUE; 1428 1429 if (!mat->rowvalues && (idx || v)) { 1430 /* 1431 allocate enough space to hold information from the longest row. 1432 */ 1433 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1434 PetscInt max = 1,tmp; 1435 for (i=0; i<matin->rmap->n; i++) { 1436 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1437 if (max < tmp) { max = tmp; } 1438 } 1439 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1440 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1441 } 1442 1443 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1444 lrow = row - rstart; 1445 1446 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1447 if (!v) {pvA = 0; pvB = 0;} 1448 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1449 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1450 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1451 nztot = nzA + nzB; 1452 1453 cmap = mat->garray; 1454 if (v || idx) { 1455 if (nztot) { 1456 /* Sort by increasing column numbers, assuming A and B already sorted */ 1457 PetscInt imark = -1; 1458 if (v) { 1459 *v = v_p = mat->rowvalues; 1460 for (i=0; i<nzB; i++) { 1461 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1462 else break; 1463 } 1464 imark = i; 1465 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1466 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1467 } 1468 if (idx) { 1469 *idx = idx_p = mat->rowindices; 1470 if (imark > -1) { 1471 for (i=0; i<imark; i++) { 1472 idx_p[i] = cmap[cworkB[i]]; 1473 } 1474 } else { 1475 for (i=0; i<nzB; i++) { 1476 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1477 else break; 1478 } 1479 imark = i; 1480 } 1481 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1482 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1483 } 1484 } else { 1485 if (idx) *idx = 0; 1486 if (v) *v = 0; 1487 } 1488 } 1489 *nz = nztot; 1490 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1491 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1497 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1498 { 1499 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1500 1501 PetscFunctionBegin; 1502 if (!aij->getrowactive) { 1503 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1504 } 1505 aij->getrowactive = PETSC_FALSE; 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatNorm_MPIAIJ" 1511 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1512 { 1513 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1514 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1515 PetscErrorCode ierr; 1516 PetscInt i,j,cstart = mat->cmap->rstart; 1517 PetscReal sum = 0.0; 1518 MatScalar *v; 1519 1520 PetscFunctionBegin; 1521 if (aij->size == 1) { 1522 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1523 } else { 1524 if (type == NORM_FROBENIUS) { 1525 v = amat->a; 1526 for (i=0; i<amat->nz; i++) { 1527 #if defined(PETSC_USE_COMPLEX) 1528 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1529 #else 1530 sum += (*v)*(*v); v++; 1531 #endif 1532 } 1533 v = bmat->a; 1534 for (i=0; i<bmat->nz; i++) { 1535 #if defined(PETSC_USE_COMPLEX) 1536 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1537 #else 1538 sum += (*v)*(*v); v++; 1539 #endif 1540 } 1541 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1542 *norm = sqrt(*norm); 1543 } else if (type == NORM_1) { /* max column norm */ 1544 PetscReal *tmp,*tmp2; 1545 PetscInt *jj,*garray = aij->garray; 1546 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1547 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1548 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1549 *norm = 0.0; 1550 v = amat->a; jj = amat->j; 1551 for (j=0; j<amat->nz; j++) { 1552 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1553 } 1554 v = bmat->a; jj = bmat->j; 1555 for (j=0; j<bmat->nz; j++) { 1556 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1557 } 1558 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1559 for (j=0; j<mat->cmap->N; j++) { 1560 if (tmp2[j] > *norm) *norm = tmp2[j]; 1561 } 1562 ierr = PetscFree(tmp);CHKERRQ(ierr); 1563 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1564 } else if (type == NORM_INFINITY) { /* max row norm */ 1565 PetscReal ntemp = 0.0; 1566 for (j=0; j<aij->A->rmap->n; j++) { 1567 v = amat->a + amat->i[j]; 1568 sum = 0.0; 1569 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1570 sum += PetscAbsScalar(*v); v++; 1571 } 1572 v = bmat->a + bmat->i[j]; 1573 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1574 sum += PetscAbsScalar(*v); v++; 1575 } 1576 if (sum > ntemp) ntemp = sum; 1577 } 1578 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1579 } else { 1580 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1581 } 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatTranspose_MPIAIJ" 1588 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1589 { 1590 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1591 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1592 PetscErrorCode ierr; 1593 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1594 PetscInt cstart=A->cmap->rstart,ncol; 1595 Mat B; 1596 MatScalar *array; 1597 1598 PetscFunctionBegin; 1599 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1600 1601 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1602 ai = Aloc->i; aj = Aloc->j; 1603 bi = Bloc->i; bj = Bloc->j; 1604 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1605 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1606 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1607 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1608 for (i=0; i<ai[ma]; i++){ 1609 d_nnz[aj[i]] ++; 1610 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1611 } 1612 1613 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1614 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1615 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1616 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1617 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1618 } else { 1619 B = *matout; 1620 } 1621 1622 /* copy over the A part */ 1623 array = Aloc->a; 1624 row = A->rmap->rstart; 1625 for (i=0; i<ma; i++) { 1626 ncol = ai[i+1]-ai[i]; 1627 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1628 row++; array += ncol; aj += ncol; 1629 } 1630 aj = Aloc->j; 1631 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1632 1633 /* copy over the B part */ 1634 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1635 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1636 array = Bloc->a; 1637 row = A->rmap->rstart; 1638 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1639 cols_tmp = cols; 1640 for (i=0; i<mb; i++) { 1641 ncol = bi[i+1]-bi[i]; 1642 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1643 row++; array += ncol; cols_tmp += ncol; 1644 } 1645 ierr = PetscFree(cols);CHKERRQ(ierr); 1646 1647 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1648 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1649 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1650 *matout = B; 1651 } else { 1652 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1653 } 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1659 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1660 { 1661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1662 Mat a = aij->A,b = aij->B; 1663 PetscErrorCode ierr; 1664 PetscInt s1,s2,s3; 1665 1666 PetscFunctionBegin; 1667 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1668 if (rr) { 1669 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1670 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1671 /* Overlap communication with computation. */ 1672 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 } 1674 if (ll) { 1675 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1676 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1677 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1678 } 1679 /* scale the diagonal block */ 1680 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1681 1682 if (rr) { 1683 /* Do a scatter end and then right scale the off-diagonal block */ 1684 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1685 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1686 } 1687 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1693 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1694 { 1695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1696 PetscErrorCode ierr; 1697 1698 PetscFunctionBegin; 1699 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1700 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1705 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1706 { 1707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1708 PetscErrorCode ierr; 1709 1710 PetscFunctionBegin; 1711 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatEqual_MPIAIJ" 1717 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1718 { 1719 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1720 Mat a,b,c,d; 1721 PetscTruth flg; 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 a = matA->A; b = matA->B; 1726 c = matB->A; d = matB->B; 1727 1728 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1729 if (flg) { 1730 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1731 } 1732 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatCopy_MPIAIJ" 1738 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1739 { 1740 PetscErrorCode ierr; 1741 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1742 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1743 1744 PetscFunctionBegin; 1745 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1746 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1747 /* because of the column compression in the off-processor part of the matrix a->B, 1748 the number of columns in a->B and b->B may be different, hence we cannot call 1749 the MatCopy() directly on the two parts. If need be, we can provide a more 1750 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1751 then copying the submatrices */ 1752 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1753 } else { 1754 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1755 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1762 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1763 { 1764 PetscErrorCode ierr; 1765 1766 PetscFunctionBegin; 1767 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #include "petscblaslapack.h" 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatAXPY_MPIAIJ" 1774 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1775 { 1776 PetscErrorCode ierr; 1777 PetscInt i; 1778 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1779 PetscBLASInt bnz,one=1; 1780 Mat_SeqAIJ *x,*y; 1781 1782 PetscFunctionBegin; 1783 if (str == SAME_NONZERO_PATTERN) { 1784 PetscScalar alpha = a; 1785 x = (Mat_SeqAIJ *)xx->A->data; 1786 y = (Mat_SeqAIJ *)yy->A->data; 1787 bnz = PetscBLASIntCast(x->nz); 1788 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1789 x = (Mat_SeqAIJ *)xx->B->data; 1790 y = (Mat_SeqAIJ *)yy->B->data; 1791 bnz = PetscBLASIntCast(x->nz); 1792 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1793 } else if (str == SUBSET_NONZERO_PATTERN) { 1794 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1795 1796 x = (Mat_SeqAIJ *)xx->B->data; 1797 y = (Mat_SeqAIJ *)yy->B->data; 1798 if (y->xtoy && y->XtoY != xx->B) { 1799 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1800 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1801 } 1802 if (!y->xtoy) { /* get xtoy */ 1803 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1804 y->XtoY = xx->B; 1805 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1806 } 1807 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1808 } else { 1809 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatConjugate_MPIAIJ" 1818 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1819 { 1820 #if defined(PETSC_USE_COMPLEX) 1821 PetscErrorCode ierr; 1822 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1823 1824 PetscFunctionBegin; 1825 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1826 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1827 #else 1828 PetscFunctionBegin; 1829 #endif 1830 PetscFunctionReturn(0); 1831 } 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatRealPart_MPIAIJ" 1835 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1836 { 1837 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1838 PetscErrorCode ierr; 1839 1840 PetscFunctionBegin; 1841 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1842 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1848 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1849 { 1850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1851 PetscErrorCode ierr; 1852 1853 PetscFunctionBegin; 1854 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1855 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #ifdef PETSC_HAVE_PBGL 1860 1861 #include <boost/parallel/mpi/bsp_process_group.hpp> 1862 #include <boost/graph/distributed/ilu_default_graph.hpp> 1863 #include <boost/graph/distributed/ilu_0_block.hpp> 1864 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1865 #include <boost/graph/distributed/petsc/interface.hpp> 1866 #include <boost/multi_array.hpp> 1867 #include <boost/parallel/distributed_property_map->hpp> 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1871 /* 1872 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1873 */ 1874 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1875 { 1876 namespace petsc = boost::distributed::petsc; 1877 1878 namespace graph_dist = boost::graph::distributed; 1879 using boost::graph::distributed::ilu_default::process_group_type; 1880 using boost::graph::ilu_permuted; 1881 1882 PetscTruth row_identity, col_identity; 1883 PetscContainer c; 1884 PetscInt m, n, M, N; 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1889 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1890 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1891 if (!row_identity || !col_identity) { 1892 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1893 } 1894 1895 process_group_type pg; 1896 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1897 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1898 lgraph_type& level_graph = *lgraph_p; 1899 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1900 1901 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1902 ilu_permuted(level_graph); 1903 1904 /* put together the new matrix */ 1905 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1906 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1907 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1908 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1909 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1910 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1911 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1912 1913 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1914 ierr = PetscContainerSetPointer(c, lgraph_p); 1915 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1921 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1922 { 1923 PetscFunctionBegin; 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatSolve_MPIAIJ" 1929 /* 1930 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1931 */ 1932 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1933 { 1934 namespace graph_dist = boost::graph::distributed; 1935 1936 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1937 lgraph_type* lgraph_p; 1938 PetscContainer c; 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1943 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1944 ierr = VecCopy(b, x);CHKERRQ(ierr); 1945 1946 PetscScalar* array_x; 1947 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1948 PetscInt sx; 1949 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1950 1951 PetscScalar* array_b; 1952 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1953 PetscInt sb; 1954 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1955 1956 lgraph_type& level_graph = *lgraph_p; 1957 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1958 1959 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1960 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1961 ref_x(array_x, boost::extents[num_vertices(graph)]); 1962 1963 typedef boost::iterator_property_map<array_ref_type::iterator, 1964 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1965 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1966 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1967 1968 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1969 1970 PetscFunctionReturn(0); 1971 } 1972 #endif 1973 1974 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1975 PetscInt nzlocal,nsends,nrecvs; 1976 PetscMPIInt *send_rank; 1977 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1978 PetscScalar *sbuf_a,**rbuf_a; 1979 PetscErrorCode (*MatDestroy)(Mat); 1980 } Mat_Redundant; 1981 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1984 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1985 { 1986 PetscErrorCode ierr; 1987 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1988 PetscInt i; 1989 1990 PetscFunctionBegin; 1991 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1992 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1993 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1994 for (i=0; i<redund->nrecvs; i++){ 1995 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1996 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1997 } 1998 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1999 ierr = PetscFree(redund);CHKERRQ(ierr); 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatDestroy_MatRedundant" 2005 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2006 { 2007 PetscErrorCode ierr; 2008 PetscContainer container; 2009 Mat_Redundant *redund=PETSC_NULL; 2010 2011 PetscFunctionBegin; 2012 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2013 if (container) { 2014 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2015 } else { 2016 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2017 } 2018 A->ops->destroy = redund->MatDestroy; 2019 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2020 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2021 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2027 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2028 { 2029 PetscMPIInt rank,size; 2030 MPI_Comm comm=((PetscObject)mat)->comm; 2031 PetscErrorCode ierr; 2032 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2033 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2034 PetscInt *rowrange=mat->rmap->range; 2035 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2036 Mat A=aij->A,B=aij->B,C=*matredundant; 2037 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2038 PetscScalar *sbuf_a; 2039 PetscInt nzlocal=a->nz+b->nz; 2040 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2041 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2042 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2043 MatScalar *aworkA,*aworkB; 2044 PetscScalar *vals; 2045 PetscMPIInt tag1,tag2,tag3,imdex; 2046 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2047 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2048 MPI_Status recv_status,*send_status; 2049 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2050 PetscInt **rbuf_j=PETSC_NULL; 2051 PetscScalar **rbuf_a=PETSC_NULL; 2052 Mat_Redundant *redund=PETSC_NULL; 2053 PetscContainer container; 2054 2055 PetscFunctionBegin; 2056 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2057 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2058 2059 if (reuse == MAT_REUSE_MATRIX) { 2060 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2061 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2062 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2063 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2064 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2065 if (container) { 2066 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2067 } else { 2068 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2069 } 2070 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2071 2072 nsends = redund->nsends; 2073 nrecvs = redund->nrecvs; 2074 send_rank = redund->send_rank; recv_rank = send_rank + size; 2075 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2076 sbuf_j = redund->sbuf_j; 2077 sbuf_a = redund->sbuf_a; 2078 rbuf_j = redund->rbuf_j; 2079 rbuf_a = redund->rbuf_a; 2080 } 2081 2082 if (reuse == MAT_INITIAL_MATRIX){ 2083 PetscMPIInt subrank,subsize; 2084 PetscInt nleftover,np_subcomm; 2085 /* get the destination processors' id send_rank, nsends and nrecvs */ 2086 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2087 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2088 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2089 recv_rank = send_rank + size; 2090 np_subcomm = size/nsubcomm; 2091 nleftover = size - nsubcomm*np_subcomm; 2092 nsends = 0; nrecvs = 0; 2093 for (i=0; i<size; i++){ /* i=rank*/ 2094 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2095 send_rank[nsends] = i; nsends++; 2096 recv_rank[nrecvs++] = i; 2097 } 2098 } 2099 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2100 i = size-nleftover-1; 2101 j = 0; 2102 while (j < nsubcomm - nleftover){ 2103 send_rank[nsends++] = i; 2104 i--; j++; 2105 } 2106 } 2107 2108 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2109 for (i=0; i<nleftover; i++){ 2110 recv_rank[nrecvs++] = size-nleftover+i; 2111 } 2112 } 2113 2114 /* allocate sbuf_j, sbuf_a */ 2115 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2116 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2117 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2118 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2119 2120 /* copy mat's local entries into the buffers */ 2121 if (reuse == MAT_INITIAL_MATRIX){ 2122 rownz_max = 0; 2123 rptr = sbuf_j; 2124 cols = sbuf_j + rend-rstart + 1; 2125 vals = sbuf_a; 2126 rptr[0] = 0; 2127 for (i=0; i<rend-rstart; i++){ 2128 row = i + rstart; 2129 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2130 ncols = nzA + nzB; 2131 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2132 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2133 /* load the column indices for this row into cols */ 2134 lwrite = 0; 2135 for (l=0; l<nzB; l++) { 2136 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2137 vals[lwrite] = aworkB[l]; 2138 cols[lwrite++] = ctmp; 2139 } 2140 } 2141 for (l=0; l<nzA; l++){ 2142 vals[lwrite] = aworkA[l]; 2143 cols[lwrite++] = cstart + cworkA[l]; 2144 } 2145 for (l=0; l<nzB; l++) { 2146 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2147 vals[lwrite] = aworkB[l]; 2148 cols[lwrite++] = ctmp; 2149 } 2150 } 2151 vals += ncols; 2152 cols += ncols; 2153 rptr[i+1] = rptr[i] + ncols; 2154 if (rownz_max < ncols) rownz_max = ncols; 2155 } 2156 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2157 } else { /* only copy matrix values into sbuf_a */ 2158 rptr = sbuf_j; 2159 vals = sbuf_a; 2160 rptr[0] = 0; 2161 for (i=0; i<rend-rstart; i++){ 2162 row = i + rstart; 2163 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2164 ncols = nzA + nzB; 2165 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2166 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2167 lwrite = 0; 2168 for (l=0; l<nzB; l++) { 2169 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2170 } 2171 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2172 for (l=0; l<nzB; l++) { 2173 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2174 } 2175 vals += ncols; 2176 rptr[i+1] = rptr[i] + ncols; 2177 } 2178 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2179 2180 /* send nzlocal to others, and recv other's nzlocal */ 2181 /*--------------------------------------------------*/ 2182 if (reuse == MAT_INITIAL_MATRIX){ 2183 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2184 s_waits2 = s_waits3 + nsends; 2185 s_waits1 = s_waits2 + nsends; 2186 r_waits1 = s_waits1 + nsends; 2187 r_waits2 = r_waits1 + nrecvs; 2188 r_waits3 = r_waits2 + nrecvs; 2189 } else { 2190 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2191 r_waits3 = s_waits3 + nsends; 2192 } 2193 2194 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2195 if (reuse == MAT_INITIAL_MATRIX){ 2196 /* get new tags to keep the communication clean */ 2197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2198 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2199 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2200 rbuf_nz = sbuf_nz + nsends; 2201 2202 /* post receives of other's nzlocal */ 2203 for (i=0; i<nrecvs; i++){ 2204 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2205 } 2206 /* send nzlocal to others */ 2207 for (i=0; i<nsends; i++){ 2208 sbuf_nz[i] = nzlocal; 2209 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2210 } 2211 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2212 count = nrecvs; 2213 while (count) { 2214 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2215 recv_rank[imdex] = recv_status.MPI_SOURCE; 2216 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2217 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2218 2219 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2220 rbuf_nz[imdex] += i + 2; 2221 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2222 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2223 count--; 2224 } 2225 /* wait on sends of nzlocal */ 2226 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2227 /* send mat->i,j to others, and recv from other's */ 2228 /*------------------------------------------------*/ 2229 for (i=0; i<nsends; i++){ 2230 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2231 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2232 } 2233 /* wait on receives of mat->i,j */ 2234 /*------------------------------*/ 2235 count = nrecvs; 2236 while (count) { 2237 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2238 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2239 count--; 2240 } 2241 /* wait on sends of mat->i,j */ 2242 /*---------------------------*/ 2243 if (nsends) { 2244 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2245 } 2246 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2247 2248 /* post receives, send and receive mat->a */ 2249 /*----------------------------------------*/ 2250 for (imdex=0; imdex<nrecvs; imdex++) { 2251 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2252 } 2253 for (i=0; i<nsends; i++){ 2254 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2255 } 2256 count = nrecvs; 2257 while (count) { 2258 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2259 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2260 count--; 2261 } 2262 if (nsends) { 2263 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2264 } 2265 2266 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2267 2268 /* create redundant matrix */ 2269 /*-------------------------*/ 2270 if (reuse == MAT_INITIAL_MATRIX){ 2271 /* compute rownz_max for preallocation */ 2272 for (imdex=0; imdex<nrecvs; imdex++){ 2273 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2274 rptr = rbuf_j[imdex]; 2275 for (i=0; i<j; i++){ 2276 ncols = rptr[i+1] - rptr[i]; 2277 if (rownz_max < ncols) rownz_max = ncols; 2278 } 2279 } 2280 2281 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2282 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2283 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2284 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2285 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2286 } else { 2287 C = *matredundant; 2288 } 2289 2290 /* insert local matrix entries */ 2291 rptr = sbuf_j; 2292 cols = sbuf_j + rend-rstart + 1; 2293 vals = sbuf_a; 2294 for (i=0; i<rend-rstart; i++){ 2295 row = i + rstart; 2296 ncols = rptr[i+1] - rptr[i]; 2297 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2298 vals += ncols; 2299 cols += ncols; 2300 } 2301 /* insert received matrix entries */ 2302 for (imdex=0; imdex<nrecvs; imdex++){ 2303 rstart = rowrange[recv_rank[imdex]]; 2304 rend = rowrange[recv_rank[imdex]+1]; 2305 rptr = rbuf_j[imdex]; 2306 cols = rbuf_j[imdex] + rend-rstart + 1; 2307 vals = rbuf_a[imdex]; 2308 for (i=0; i<rend-rstart; i++){ 2309 row = i + rstart; 2310 ncols = rptr[i+1] - rptr[i]; 2311 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2312 vals += ncols; 2313 cols += ncols; 2314 } 2315 } 2316 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2317 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2318 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2319 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2320 if (reuse == MAT_INITIAL_MATRIX){ 2321 PetscContainer container; 2322 *matredundant = C; 2323 /* create a supporting struct and attach it to C for reuse */ 2324 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2325 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2326 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2327 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2328 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2329 2330 redund->nzlocal = nzlocal; 2331 redund->nsends = nsends; 2332 redund->nrecvs = nrecvs; 2333 redund->send_rank = send_rank; 2334 redund->sbuf_nz = sbuf_nz; 2335 redund->sbuf_j = sbuf_j; 2336 redund->sbuf_a = sbuf_a; 2337 redund->rbuf_j = rbuf_j; 2338 redund->rbuf_a = rbuf_a; 2339 2340 redund->MatDestroy = C->ops->destroy; 2341 C->ops->destroy = MatDestroy_MatRedundant; 2342 } 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2348 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2349 { 2350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2351 PetscErrorCode ierr; 2352 PetscInt i,*idxb = 0; 2353 PetscScalar *va,*vb; 2354 Vec vtmp; 2355 2356 PetscFunctionBegin; 2357 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2358 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2359 if (idx) { 2360 for (i=0; i<A->rmap->n; i++) { 2361 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2362 } 2363 } 2364 2365 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2366 if (idx) { 2367 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2368 } 2369 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2370 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2371 2372 for (i=0; i<A->rmap->n; i++){ 2373 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2374 va[i] = vb[i]; 2375 if (idx) idx[i] = a->garray[idxb[i]]; 2376 } 2377 } 2378 2379 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2380 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2381 if (idxb) { 2382 ierr = PetscFree(idxb);CHKERRQ(ierr); 2383 } 2384 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2391 { 2392 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2393 PetscErrorCode ierr; 2394 PetscInt i,*idxb = 0; 2395 PetscScalar *va,*vb; 2396 Vec vtmp; 2397 2398 PetscFunctionBegin; 2399 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2400 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2401 if (idx) { 2402 for (i=0; i<A->cmap->n; i++) { 2403 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2404 } 2405 } 2406 2407 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2408 if (idx) { 2409 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2410 } 2411 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2412 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2413 2414 for (i=0; i<A->rmap->n; i++){ 2415 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2416 va[i] = vb[i]; 2417 if (idx) idx[i] = a->garray[idxb[i]]; 2418 } 2419 } 2420 2421 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2422 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2423 if (idxb) { 2424 ierr = PetscFree(idxb);CHKERRQ(ierr); 2425 } 2426 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2432 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2433 { 2434 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2435 PetscInt n = A->rmap->n; 2436 PetscInt cstart = A->cmap->rstart; 2437 PetscInt *cmap = mat->garray; 2438 PetscInt *diagIdx, *offdiagIdx; 2439 Vec diagV, offdiagV; 2440 PetscScalar *a, *diagA, *offdiagA; 2441 PetscInt r; 2442 PetscErrorCode ierr; 2443 2444 PetscFunctionBegin; 2445 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2446 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2447 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2448 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2449 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2450 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2451 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2452 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2453 for(r = 0; r < n; ++r) { 2454 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2455 a[r] = diagA[r]; 2456 idx[r] = cstart + diagIdx[r]; 2457 } else { 2458 a[r] = offdiagA[r]; 2459 idx[r] = cmap[offdiagIdx[r]]; 2460 } 2461 } 2462 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2464 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2465 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2466 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2467 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2473 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2474 { 2475 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2476 PetscInt n = A->rmap->n; 2477 PetscInt cstart = A->cmap->rstart; 2478 PetscInt *cmap = mat->garray; 2479 PetscInt *diagIdx, *offdiagIdx; 2480 Vec diagV, offdiagV; 2481 PetscScalar *a, *diagA, *offdiagA; 2482 PetscInt r; 2483 PetscErrorCode ierr; 2484 2485 PetscFunctionBegin; 2486 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2487 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2488 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2489 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2490 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2491 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2492 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2493 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2494 for(r = 0; r < n; ++r) { 2495 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2496 a[r] = diagA[r]; 2497 idx[r] = cstart + diagIdx[r]; 2498 } else { 2499 a[r] = offdiagA[r]; 2500 idx[r] = cmap[offdiagIdx[r]]; 2501 } 2502 } 2503 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2504 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2505 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2506 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2507 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2508 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2509 PetscFunctionReturn(0); 2510 } 2511 2512 #undef __FUNCT__ 2513 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2514 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2515 { 2516 PetscErrorCode ierr; 2517 2518 PetscFunctionBegin; 2519 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 /* -------------------------------------------------------------------*/ 2524 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2525 MatGetRow_MPIAIJ, 2526 MatRestoreRow_MPIAIJ, 2527 MatMult_MPIAIJ, 2528 /* 4*/ MatMultAdd_MPIAIJ, 2529 MatMultTranspose_MPIAIJ, 2530 MatMultTransposeAdd_MPIAIJ, 2531 #ifdef PETSC_HAVE_PBGL 2532 MatSolve_MPIAIJ, 2533 #else 2534 0, 2535 #endif 2536 0, 2537 0, 2538 /*10*/ 0, 2539 0, 2540 0, 2541 MatRelax_MPIAIJ, 2542 MatTranspose_MPIAIJ, 2543 /*15*/ MatGetInfo_MPIAIJ, 2544 MatEqual_MPIAIJ, 2545 MatGetDiagonal_MPIAIJ, 2546 MatDiagonalScale_MPIAIJ, 2547 MatNorm_MPIAIJ, 2548 /*20*/ MatAssemblyBegin_MPIAIJ, 2549 MatAssemblyEnd_MPIAIJ, 2550 MatSetOption_MPIAIJ, 2551 MatZeroEntries_MPIAIJ, 2552 /*24*/ MatZeroRows_MPIAIJ, 2553 0, 2554 #ifdef PETSC_HAVE_PBGL 2555 0, 2556 #else 2557 0, 2558 #endif 2559 0, 2560 0, 2561 /*29*/ MatSetUpPreallocation_MPIAIJ, 2562 #ifdef PETSC_HAVE_PBGL 2563 0, 2564 #else 2565 0, 2566 #endif 2567 0, 2568 0, 2569 0, 2570 /*34*/ MatDuplicate_MPIAIJ, 2571 0, 2572 0, 2573 0, 2574 0, 2575 /*39*/ MatAXPY_MPIAIJ, 2576 MatGetSubMatrices_MPIAIJ, 2577 MatIncreaseOverlap_MPIAIJ, 2578 MatGetValues_MPIAIJ, 2579 MatCopy_MPIAIJ, 2580 /*44*/ MatGetRowMax_MPIAIJ, 2581 MatScale_MPIAIJ, 2582 0, 2583 0, 2584 0, 2585 /*49*/ MatSetBlockSize_MPIAIJ, 2586 0, 2587 0, 2588 0, 2589 0, 2590 /*54*/ MatFDColoringCreate_MPIAIJ, 2591 0, 2592 MatSetUnfactored_MPIAIJ, 2593 MatPermute_MPIAIJ, 2594 0, 2595 /*59*/ MatGetSubMatrix_MPIAIJ, 2596 MatDestroy_MPIAIJ, 2597 MatView_MPIAIJ, 2598 0, 2599 0, 2600 /*64*/ 0, 2601 0, 2602 0, 2603 0, 2604 0, 2605 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2606 MatGetRowMinAbs_MPIAIJ, 2607 0, 2608 MatSetColoring_MPIAIJ, 2609 #if defined(PETSC_HAVE_ADIC) 2610 MatSetValuesAdic_MPIAIJ, 2611 #else 2612 0, 2613 #endif 2614 MatSetValuesAdifor_MPIAIJ, 2615 /*75*/ 0, 2616 0, 2617 0, 2618 0, 2619 0, 2620 /*80*/ 0, 2621 0, 2622 0, 2623 /*83*/ MatLoad_MPIAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2630 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2631 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2632 MatPtAP_Basic, 2633 MatPtAPSymbolic_MPIAIJ, 2634 /*94*/ MatPtAPNumeric_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*99*/ 0, 2640 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2641 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2642 MatConjugate_MPIAIJ, 2643 0, 2644 /*104*/MatSetValuesRow_MPIAIJ, 2645 MatRealPart_MPIAIJ, 2646 MatImaginaryPart_MPIAIJ, 2647 0, 2648 0, 2649 /*109*/0, 2650 MatGetRedundantMatrix_MPIAIJ, 2651 MatGetRowMin_MPIAIJ, 2652 0, 2653 0, 2654 /*114*/MatGetSeqNonzerostructure_MPIAIJ}; 2655 2656 /* ----------------------------------------------------------------------------------------*/ 2657 2658 EXTERN_C_BEGIN 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2661 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2662 { 2663 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2664 PetscErrorCode ierr; 2665 2666 PetscFunctionBegin; 2667 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2668 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2669 PetscFunctionReturn(0); 2670 } 2671 EXTERN_C_END 2672 2673 EXTERN_C_BEGIN 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2676 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2677 { 2678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2679 PetscErrorCode ierr; 2680 2681 PetscFunctionBegin; 2682 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2683 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2684 PetscFunctionReturn(0); 2685 } 2686 EXTERN_C_END 2687 2688 #include "petscpc.h" 2689 EXTERN_C_BEGIN 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2692 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2693 { 2694 Mat_MPIAIJ *b; 2695 PetscErrorCode ierr; 2696 PetscInt i; 2697 2698 PetscFunctionBegin; 2699 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2700 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2701 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2702 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2703 2704 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2705 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2706 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2707 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2708 if (d_nnz) { 2709 for (i=0; i<B->rmap->n; i++) { 2710 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2711 } 2712 } 2713 if (o_nnz) { 2714 for (i=0; i<B->rmap->n; i++) { 2715 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2716 } 2717 } 2718 b = (Mat_MPIAIJ*)B->data; 2719 2720 if (!B->preallocated) { 2721 /* Explicitly create 2 MATSEQAIJ matrices. */ 2722 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2723 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2724 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2725 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2726 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2727 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2728 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2729 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2730 } 2731 2732 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2733 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2734 B->preallocated = PETSC_TRUE; 2735 PetscFunctionReturn(0); 2736 } 2737 EXTERN_C_END 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2741 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2742 { 2743 Mat mat; 2744 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2745 PetscErrorCode ierr; 2746 2747 PetscFunctionBegin; 2748 *newmat = 0; 2749 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2750 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2751 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2752 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2753 a = (Mat_MPIAIJ*)mat->data; 2754 2755 mat->factor = matin->factor; 2756 mat->rmap->bs = matin->rmap->bs; 2757 mat->assembled = PETSC_TRUE; 2758 mat->insertmode = NOT_SET_VALUES; 2759 mat->preallocated = PETSC_TRUE; 2760 2761 a->size = oldmat->size; 2762 a->rank = oldmat->rank; 2763 a->donotstash = oldmat->donotstash; 2764 a->roworiented = oldmat->roworiented; 2765 a->rowindices = 0; 2766 a->rowvalues = 0; 2767 a->getrowactive = PETSC_FALSE; 2768 2769 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2770 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2771 2772 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2773 if (oldmat->colmap) { 2774 #if defined (PETSC_USE_CTABLE) 2775 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2776 #else 2777 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2778 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2779 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2780 #endif 2781 } else a->colmap = 0; 2782 if (oldmat->garray) { 2783 PetscInt len; 2784 len = oldmat->B->cmap->n; 2785 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2786 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2787 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2788 } else a->garray = 0; 2789 2790 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2792 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2793 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2794 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2795 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2796 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2798 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2799 *newmat = mat; 2800 PetscFunctionReturn(0); 2801 } 2802 2803 #include "petscsys.h" 2804 2805 #undef __FUNCT__ 2806 #define __FUNCT__ "MatLoad_MPIAIJ" 2807 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2808 { 2809 Mat A; 2810 PetscScalar *vals,*svals; 2811 MPI_Comm comm = ((PetscObject)viewer)->comm; 2812 MPI_Status status; 2813 PetscErrorCode ierr; 2814 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2815 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2816 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2817 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2818 PetscInt cend,cstart,n,*rowners; 2819 int fd; 2820 2821 PetscFunctionBegin; 2822 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2823 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2824 if (!rank) { 2825 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2826 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2827 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2828 } 2829 2830 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2831 M = header[1]; N = header[2]; 2832 /* determine ownership of all rows */ 2833 m = M/size + ((M % size) > rank); 2834 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2835 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2836 2837 /* First process needs enough room for process with most rows */ 2838 if (!rank) { 2839 mmax = rowners[1]; 2840 for (i=2; i<size; i++) { 2841 mmax = PetscMax(mmax,rowners[i]); 2842 } 2843 } else mmax = m; 2844 2845 rowners[0] = 0; 2846 for (i=2; i<=size; i++) { 2847 rowners[i] += rowners[i-1]; 2848 } 2849 rstart = rowners[rank]; 2850 rend = rowners[rank+1]; 2851 2852 /* distribute row lengths to all processors */ 2853 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2854 if (!rank) { 2855 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2856 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2857 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2858 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2859 for (j=0; j<m; j++) { 2860 procsnz[0] += ourlens[j]; 2861 } 2862 for (i=1; i<size; i++) { 2863 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2864 /* calculate the number of nonzeros on each processor */ 2865 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2866 procsnz[i] += rowlengths[j]; 2867 } 2868 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2869 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2870 } 2871 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2872 } else { 2873 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2874 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2875 } 2876 2877 if (!rank) { 2878 /* determine max buffer needed and allocate it */ 2879 maxnz = 0; 2880 for (i=0; i<size; i++) { 2881 maxnz = PetscMax(maxnz,procsnz[i]); 2882 } 2883 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2884 2885 /* read in my part of the matrix column indices */ 2886 nz = procsnz[0]; 2887 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2888 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2889 2890 /* read in every one elses and ship off */ 2891 for (i=1; i<size; i++) { 2892 nz = procsnz[i]; 2893 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2894 mpicnt = PetscMPIIntCast(nz); 2895 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2896 } 2897 ierr = PetscFree(cols);CHKERRQ(ierr); 2898 } else { 2899 /* determine buffer space needed for message */ 2900 nz = 0; 2901 for (i=0; i<m; i++) { 2902 nz += ourlens[i]; 2903 } 2904 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2905 2906 /* receive message of column indices*/ 2907 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2908 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2909 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2910 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2911 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2912 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2913 } 2914 2915 /* determine column ownership if matrix is not square */ 2916 if (N != M) { 2917 n = N/size + ((N % size) > rank); 2918 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2919 cstart = cend - n; 2920 } else { 2921 cstart = rstart; 2922 cend = rend; 2923 n = cend - cstart; 2924 } 2925 2926 /* loop over local rows, determining number of off diagonal entries */ 2927 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2928 jj = 0; 2929 for (i=0; i<m; i++) { 2930 for (j=0; j<ourlens[i]; j++) { 2931 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2932 jj++; 2933 } 2934 } 2935 2936 /* create our matrix */ 2937 for (i=0; i<m; i++) { 2938 ourlens[i] -= offlens[i]; 2939 } 2940 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2941 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2942 ierr = MatSetType(A,type);CHKERRQ(ierr); 2943 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2944 2945 for (i=0; i<m; i++) { 2946 ourlens[i] += offlens[i]; 2947 } 2948 2949 if (!rank) { 2950 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2951 2952 /* read in my part of the matrix numerical values */ 2953 nz = procsnz[0]; 2954 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2955 2956 /* insert into matrix */ 2957 jj = rstart; 2958 smycols = mycols; 2959 svals = vals; 2960 for (i=0; i<m; i++) { 2961 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2962 smycols += ourlens[i]; 2963 svals += ourlens[i]; 2964 jj++; 2965 } 2966 2967 /* read in other processors and ship out */ 2968 for (i=1; i<size; i++) { 2969 nz = procsnz[i]; 2970 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2971 mpicnt = PetscMPIIntCast(nz); 2972 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2973 } 2974 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2975 } else { 2976 /* receive numeric values */ 2977 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2978 2979 /* receive message of values*/ 2980 mpicnt = PetscMPIIntCast(nz); 2981 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2982 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 2983 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2984 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2985 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2986 2987 /* insert into matrix */ 2988 jj = rstart; 2989 smycols = mycols; 2990 svals = vals; 2991 for (i=0; i<m; i++) { 2992 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2993 smycols += ourlens[i]; 2994 svals += ourlens[i]; 2995 jj++; 2996 } 2997 } 2998 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2999 ierr = PetscFree(vals);CHKERRQ(ierr); 3000 ierr = PetscFree(mycols);CHKERRQ(ierr); 3001 ierr = PetscFree(rowners);CHKERRQ(ierr); 3002 3003 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3004 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3005 *newmat = A; 3006 PetscFunctionReturn(0); 3007 } 3008 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3011 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3012 { 3013 PetscErrorCode ierr; 3014 IS iscol_local; 3015 PetscInt csize; 3016 3017 PetscFunctionBegin; 3018 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3019 if (call == MAT_REUSE_MATRIX) { 3020 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3021 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3022 } else { 3023 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3024 } 3025 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3026 if (call == MAT_INITIAL_MATRIX) { 3027 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3028 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3029 } 3030 PetscFunctionReturn(0); 3031 } 3032 3033 #undef __FUNCT__ 3034 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3035 /* 3036 Not great since it makes two copies of the submatrix, first an SeqAIJ 3037 in local and then by concatenating the local matrices the end result. 3038 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3039 3040 Note: This requires a sequential iscol with all indices. 3041 */ 3042 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3043 { 3044 PetscErrorCode ierr; 3045 PetscMPIInt rank,size; 3046 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3047 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3048 Mat *local,M,Mreuse; 3049 MatScalar *vwork,*aa; 3050 MPI_Comm comm = ((PetscObject)mat)->comm; 3051 Mat_SeqAIJ *aij; 3052 3053 3054 PetscFunctionBegin; 3055 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3056 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3057 3058 if (call == MAT_REUSE_MATRIX) { 3059 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3060 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3061 local = &Mreuse; 3062 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3063 } else { 3064 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3065 Mreuse = *local; 3066 ierr = PetscFree(local);CHKERRQ(ierr); 3067 } 3068 3069 /* 3070 m - number of local rows 3071 n - number of columns (same on all processors) 3072 rstart - first row in new global matrix generated 3073 */ 3074 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3075 if (call == MAT_INITIAL_MATRIX) { 3076 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3077 ii = aij->i; 3078 jj = aij->j; 3079 3080 /* 3081 Determine the number of non-zeros in the diagonal and off-diagonal 3082 portions of the matrix in order to do correct preallocation 3083 */ 3084 3085 /* first get start and end of "diagonal" columns */ 3086 if (csize == PETSC_DECIDE) { 3087 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3088 if (mglobal == n) { /* square matrix */ 3089 nlocal = m; 3090 } else { 3091 nlocal = n/size + ((n % size) > rank); 3092 } 3093 } else { 3094 nlocal = csize; 3095 } 3096 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3097 rstart = rend - nlocal; 3098 if (rank == size - 1 && rend != n) { 3099 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3100 } 3101 3102 /* next, compute all the lengths */ 3103 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3104 olens = dlens + m; 3105 for (i=0; i<m; i++) { 3106 jend = ii[i+1] - ii[i]; 3107 olen = 0; 3108 dlen = 0; 3109 for (j=0; j<jend; j++) { 3110 if (*jj < rstart || *jj >= rend) olen++; 3111 else dlen++; 3112 jj++; 3113 } 3114 olens[i] = olen; 3115 dlens[i] = dlen; 3116 } 3117 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3118 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3119 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3120 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3121 ierr = PetscFree(dlens);CHKERRQ(ierr); 3122 } else { 3123 PetscInt ml,nl; 3124 3125 M = *newmat; 3126 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3127 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3128 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3129 /* 3130 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3131 rather than the slower MatSetValues(). 3132 */ 3133 M->was_assembled = PETSC_TRUE; 3134 M->assembled = PETSC_FALSE; 3135 } 3136 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3137 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3138 ii = aij->i; 3139 jj = aij->j; 3140 aa = aij->a; 3141 for (i=0; i<m; i++) { 3142 row = rstart + i; 3143 nz = ii[i+1] - ii[i]; 3144 cwork = jj; jj += nz; 3145 vwork = aa; aa += nz; 3146 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3147 } 3148 3149 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3150 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3151 *newmat = M; 3152 3153 /* save submatrix used in processor for next request */ 3154 if (call == MAT_INITIAL_MATRIX) { 3155 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3156 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3157 } 3158 3159 PetscFunctionReturn(0); 3160 } 3161 3162 EXTERN_C_BEGIN 3163 #undef __FUNCT__ 3164 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3165 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3166 { 3167 PetscInt m,cstart, cend,j,nnz,i,d; 3168 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3169 const PetscInt *JJ; 3170 PetscScalar *values; 3171 PetscErrorCode ierr; 3172 3173 PetscFunctionBegin; 3174 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3175 3176 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3177 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3178 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3179 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3180 m = B->rmap->n; 3181 cstart = B->cmap->rstart; 3182 cend = B->cmap->rend; 3183 rstart = B->rmap->rstart; 3184 3185 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3186 o_nnz = d_nnz + m; 3187 3188 #if defined(PETSC_USE_DEBUGGING) 3189 for (i=0; i<m; i++) { 3190 nnz = Ii[i+1]- Ii[i]; 3191 JJ = J + Ii[i]; 3192 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3193 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3194 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3195 for (j=1; j<nnz; j++) { 3196 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3197 } 3198 } 3199 #endif 3200 3201 for (i=0; i<m; i++) { 3202 nnz = Ii[i+1]- Ii[i]; 3203 JJ = J + Ii[i]; 3204 nnz_max = PetscMax(nnz_max,nnz); 3205 for (j=0; j<nnz; j++) { 3206 if (*JJ >= cstart) break; 3207 JJ++; 3208 } 3209 d = 0; 3210 for (; j<nnz; j++) { 3211 if (*JJ++ >= cend) break; 3212 d++; 3213 } 3214 d_nnz[i] = d; 3215 o_nnz[i] = nnz - d; 3216 } 3217 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3218 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3219 3220 if (v) values = (PetscScalar*)v; 3221 else { 3222 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3223 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3224 } 3225 3226 for (i=0; i<m; i++) { 3227 ii = i + rstart; 3228 nnz = Ii[i+1]- Ii[i]; 3229 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3230 } 3231 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3232 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3233 3234 if (!v) { 3235 ierr = PetscFree(values);CHKERRQ(ierr); 3236 } 3237 PetscFunctionReturn(0); 3238 } 3239 EXTERN_C_END 3240 3241 #undef __FUNCT__ 3242 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3243 /*@ 3244 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3245 (the default parallel PETSc format). 3246 3247 Collective on MPI_Comm 3248 3249 Input Parameters: 3250 + B - the matrix 3251 . i - the indices into j for the start of each local row (starts with zero) 3252 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3253 - v - optional values in the matrix 3254 3255 Level: developer 3256 3257 Notes: 3258 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3259 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3260 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3261 3262 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3263 3264 The format which is used for the sparse matrix input, is equivalent to a 3265 row-major ordering.. i.e for the following matrix, the input data expected is 3266 as shown: 3267 3268 1 0 0 3269 2 0 3 P0 3270 ------- 3271 4 5 6 P1 3272 3273 Process0 [P0]: rows_owned=[0,1] 3274 i = {0,1,3} [size = nrow+1 = 2+1] 3275 j = {0,0,2} [size = nz = 6] 3276 v = {1,2,3} [size = nz = 6] 3277 3278 Process1 [P1]: rows_owned=[2] 3279 i = {0,3} [size = nrow+1 = 1+1] 3280 j = {0,1,2} [size = nz = 6] 3281 v = {4,5,6} [size = nz = 6] 3282 3283 The column indices for each row MUST be sorted. 3284 3285 .keywords: matrix, aij, compressed row, sparse, parallel 3286 3287 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3288 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3289 @*/ 3290 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3291 { 3292 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3293 3294 PetscFunctionBegin; 3295 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3296 if (f) { 3297 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3298 } 3299 PetscFunctionReturn(0); 3300 } 3301 3302 #undef __FUNCT__ 3303 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3304 /*@C 3305 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3306 (the default parallel PETSc format). For good matrix assembly performance 3307 the user should preallocate the matrix storage by setting the parameters 3308 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3309 performance can be increased by more than a factor of 50. 3310 3311 Collective on MPI_Comm 3312 3313 Input Parameters: 3314 + A - the matrix 3315 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3316 (same value is used for all local rows) 3317 . d_nnz - array containing the number of nonzeros in the various rows of the 3318 DIAGONAL portion of the local submatrix (possibly different for each row) 3319 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3320 The size of this array is equal to the number of local rows, i.e 'm'. 3321 You must leave room for the diagonal entry even if it is zero. 3322 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3323 submatrix (same value is used for all local rows). 3324 - o_nnz - array containing the number of nonzeros in the various rows of the 3325 OFF-DIAGONAL portion of the local submatrix (possibly different for 3326 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3327 structure. The size of this array is equal to the number 3328 of local rows, i.e 'm'. 3329 3330 If the *_nnz parameter is given then the *_nz parameter is ignored 3331 3332 The AIJ format (also called the Yale sparse matrix format or 3333 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3334 storage. The stored row and column indices begin with zero. See the users manual for details. 3335 3336 The parallel matrix is partitioned such that the first m0 rows belong to 3337 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3338 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3339 3340 The DIAGONAL portion of the local submatrix of a processor can be defined 3341 as the submatrix which is obtained by extraction the part corresponding 3342 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3343 first row that belongs to the processor, and r2 is the last row belonging 3344 to the this processor. This is a square mxm matrix. The remaining portion 3345 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3346 3347 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3348 3349 You can call MatGetInfo() to get information on how effective the preallocation was; 3350 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3351 You can also run with the option -info and look for messages with the string 3352 malloc in them to see if additional memory allocation was needed. 3353 3354 Example usage: 3355 3356 Consider the following 8x8 matrix with 34 non-zero values, that is 3357 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3358 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3359 as follows: 3360 3361 .vb 3362 1 2 0 | 0 3 0 | 0 4 3363 Proc0 0 5 6 | 7 0 0 | 8 0 3364 9 0 10 | 11 0 0 | 12 0 3365 ------------------------------------- 3366 13 0 14 | 15 16 17 | 0 0 3367 Proc1 0 18 0 | 19 20 21 | 0 0 3368 0 0 0 | 22 23 0 | 24 0 3369 ------------------------------------- 3370 Proc2 25 26 27 | 0 0 28 | 29 0 3371 30 0 0 | 31 32 33 | 0 34 3372 .ve 3373 3374 This can be represented as a collection of submatrices as: 3375 3376 .vb 3377 A B C 3378 D E F 3379 G H I 3380 .ve 3381 3382 Where the submatrices A,B,C are owned by proc0, D,E,F are 3383 owned by proc1, G,H,I are owned by proc2. 3384 3385 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3386 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3387 The 'M','N' parameters are 8,8, and have the same values on all procs. 3388 3389 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3390 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3391 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3392 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3393 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3394 matrix, ans [DF] as another SeqAIJ matrix. 3395 3396 When d_nz, o_nz parameters are specified, d_nz storage elements are 3397 allocated for every row of the local diagonal submatrix, and o_nz 3398 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3399 One way to choose d_nz and o_nz is to use the max nonzerors per local 3400 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3401 In this case, the values of d_nz,o_nz are: 3402 .vb 3403 proc0 : dnz = 2, o_nz = 2 3404 proc1 : dnz = 3, o_nz = 2 3405 proc2 : dnz = 1, o_nz = 4 3406 .ve 3407 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3408 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3409 for proc3. i.e we are using 12+15+10=37 storage locations to store 3410 34 values. 3411 3412 When d_nnz, o_nnz parameters are specified, the storage is specified 3413 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3414 In the above case the values for d_nnz,o_nnz are: 3415 .vb 3416 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3417 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3418 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3419 .ve 3420 Here the space allocated is sum of all the above values i.e 34, and 3421 hence pre-allocation is perfect. 3422 3423 Level: intermediate 3424 3425 .keywords: matrix, aij, compressed row, sparse, parallel 3426 3427 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3428 MPIAIJ, MatGetInfo() 3429 @*/ 3430 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3431 { 3432 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3433 3434 PetscFunctionBegin; 3435 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3436 if (f) { 3437 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3438 } 3439 PetscFunctionReturn(0); 3440 } 3441 3442 #undef __FUNCT__ 3443 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3444 /*@ 3445 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3446 CSR format the local rows. 3447 3448 Collective on MPI_Comm 3449 3450 Input Parameters: 3451 + comm - MPI communicator 3452 . m - number of local rows (Cannot be PETSC_DECIDE) 3453 . n - This value should be the same as the local size used in creating the 3454 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3455 calculated if N is given) For square matrices n is almost always m. 3456 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3457 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3458 . i - row indices 3459 . j - column indices 3460 - a - matrix values 3461 3462 Output Parameter: 3463 . mat - the matrix 3464 3465 Level: intermediate 3466 3467 Notes: 3468 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3469 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3470 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3471 3472 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3473 3474 The format which is used for the sparse matrix input, is equivalent to a 3475 row-major ordering.. i.e for the following matrix, the input data expected is 3476 as shown: 3477 3478 1 0 0 3479 2 0 3 P0 3480 ------- 3481 4 5 6 P1 3482 3483 Process0 [P0]: rows_owned=[0,1] 3484 i = {0,1,3} [size = nrow+1 = 2+1] 3485 j = {0,0,2} [size = nz = 6] 3486 v = {1,2,3} [size = nz = 6] 3487 3488 Process1 [P1]: rows_owned=[2] 3489 i = {0,3} [size = nrow+1 = 1+1] 3490 j = {0,1,2} [size = nz = 6] 3491 v = {4,5,6} [size = nz = 6] 3492 3493 .keywords: matrix, aij, compressed row, sparse, parallel 3494 3495 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3496 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3497 @*/ 3498 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3499 { 3500 PetscErrorCode ierr; 3501 3502 PetscFunctionBegin; 3503 if (i[0]) { 3504 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3505 } 3506 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3507 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3508 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3509 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3510 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3511 PetscFunctionReturn(0); 3512 } 3513 3514 #undef __FUNCT__ 3515 #define __FUNCT__ "MatCreateMPIAIJ" 3516 /*@C 3517 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3518 (the default parallel PETSc format). For good matrix assembly performance 3519 the user should preallocate the matrix storage by setting the parameters 3520 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3521 performance can be increased by more than a factor of 50. 3522 3523 Collective on MPI_Comm 3524 3525 Input Parameters: 3526 + comm - MPI communicator 3527 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3528 This value should be the same as the local size used in creating the 3529 y vector for the matrix-vector product y = Ax. 3530 . n - This value should be the same as the local size used in creating the 3531 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3532 calculated if N is given) For square matrices n is almost always m. 3533 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3534 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3535 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3536 (same value is used for all local rows) 3537 . d_nnz - array containing the number of nonzeros in the various rows of the 3538 DIAGONAL portion of the local submatrix (possibly different for each row) 3539 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3540 The size of this array is equal to the number of local rows, i.e 'm'. 3541 You must leave room for the diagonal entry even if it is zero. 3542 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3543 submatrix (same value is used for all local rows). 3544 - o_nnz - array containing the number of nonzeros in the various rows of the 3545 OFF-DIAGONAL portion of the local submatrix (possibly different for 3546 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3547 structure. The size of this array is equal to the number 3548 of local rows, i.e 'm'. 3549 3550 Output Parameter: 3551 . A - the matrix 3552 3553 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3554 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3555 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3556 3557 Notes: 3558 If the *_nnz parameter is given then the *_nz parameter is ignored 3559 3560 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3561 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3562 storage requirements for this matrix. 3563 3564 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3565 processor than it must be used on all processors that share the object for 3566 that argument. 3567 3568 The user MUST specify either the local or global matrix dimensions 3569 (possibly both). 3570 3571 The parallel matrix is partitioned across processors such that the 3572 first m0 rows belong to process 0, the next m1 rows belong to 3573 process 1, the next m2 rows belong to process 2 etc.. where 3574 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3575 values corresponding to [m x N] submatrix. 3576 3577 The columns are logically partitioned with the n0 columns belonging 3578 to 0th partition, the next n1 columns belonging to the next 3579 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3580 3581 The DIAGONAL portion of the local submatrix on any given processor 3582 is the submatrix corresponding to the rows and columns m,n 3583 corresponding to the given processor. i.e diagonal matrix on 3584 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3585 etc. The remaining portion of the local submatrix [m x (N-n)] 3586 constitute the OFF-DIAGONAL portion. The example below better 3587 illustrates this concept. 3588 3589 For a square global matrix we define each processor's diagonal portion 3590 to be its local rows and the corresponding columns (a square submatrix); 3591 each processor's off-diagonal portion encompasses the remainder of the 3592 local matrix (a rectangular submatrix). 3593 3594 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3595 3596 When calling this routine with a single process communicator, a matrix of 3597 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3598 type of communicator, use the construction mechanism: 3599 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3600 3601 By default, this format uses inodes (identical nodes) when possible. 3602 We search for consecutive rows with the same nonzero structure, thereby 3603 reusing matrix information to achieve increased efficiency. 3604 3605 Options Database Keys: 3606 + -mat_no_inode - Do not use inodes 3607 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3608 - -mat_aij_oneindex - Internally use indexing starting at 1 3609 rather than 0. Note that when calling MatSetValues(), 3610 the user still MUST index entries starting at 0! 3611 3612 3613 Example usage: 3614 3615 Consider the following 8x8 matrix with 34 non-zero values, that is 3616 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3617 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3618 as follows: 3619 3620 .vb 3621 1 2 0 | 0 3 0 | 0 4 3622 Proc0 0 5 6 | 7 0 0 | 8 0 3623 9 0 10 | 11 0 0 | 12 0 3624 ------------------------------------- 3625 13 0 14 | 15 16 17 | 0 0 3626 Proc1 0 18 0 | 19 20 21 | 0 0 3627 0 0 0 | 22 23 0 | 24 0 3628 ------------------------------------- 3629 Proc2 25 26 27 | 0 0 28 | 29 0 3630 30 0 0 | 31 32 33 | 0 34 3631 .ve 3632 3633 This can be represented as a collection of submatrices as: 3634 3635 .vb 3636 A B C 3637 D E F 3638 G H I 3639 .ve 3640 3641 Where the submatrices A,B,C are owned by proc0, D,E,F are 3642 owned by proc1, G,H,I are owned by proc2. 3643 3644 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3645 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3646 The 'M','N' parameters are 8,8, and have the same values on all procs. 3647 3648 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3649 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3650 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3651 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3652 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3653 matrix, ans [DF] as another SeqAIJ matrix. 3654 3655 When d_nz, o_nz parameters are specified, d_nz storage elements are 3656 allocated for every row of the local diagonal submatrix, and o_nz 3657 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3658 One way to choose d_nz and o_nz is to use the max nonzerors per local 3659 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3660 In this case, the values of d_nz,o_nz are: 3661 .vb 3662 proc0 : dnz = 2, o_nz = 2 3663 proc1 : dnz = 3, o_nz = 2 3664 proc2 : dnz = 1, o_nz = 4 3665 .ve 3666 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3667 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3668 for proc3. i.e we are using 12+15+10=37 storage locations to store 3669 34 values. 3670 3671 When d_nnz, o_nnz parameters are specified, the storage is specified 3672 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3673 In the above case the values for d_nnz,o_nnz are: 3674 .vb 3675 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3676 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3677 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3678 .ve 3679 Here the space allocated is sum of all the above values i.e 34, and 3680 hence pre-allocation is perfect. 3681 3682 Level: intermediate 3683 3684 .keywords: matrix, aij, compressed row, sparse, parallel 3685 3686 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3687 MPIAIJ, MatCreateMPIAIJWithArrays() 3688 @*/ 3689 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3690 { 3691 PetscErrorCode ierr; 3692 PetscMPIInt size; 3693 3694 PetscFunctionBegin; 3695 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3696 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3697 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3698 if (size > 1) { 3699 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3700 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3701 } else { 3702 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3703 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3704 } 3705 PetscFunctionReturn(0); 3706 } 3707 3708 #undef __FUNCT__ 3709 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3710 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3711 { 3712 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3713 3714 PetscFunctionBegin; 3715 *Ad = a->A; 3716 *Ao = a->B; 3717 *colmap = a->garray; 3718 PetscFunctionReturn(0); 3719 } 3720 3721 #undef __FUNCT__ 3722 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3723 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3724 { 3725 PetscErrorCode ierr; 3726 PetscInt i; 3727 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3728 3729 PetscFunctionBegin; 3730 if (coloring->ctype == IS_COLORING_GLOBAL) { 3731 ISColoringValue *allcolors,*colors; 3732 ISColoring ocoloring; 3733 3734 /* set coloring for diagonal portion */ 3735 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3736 3737 /* set coloring for off-diagonal portion */ 3738 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3739 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3740 for (i=0; i<a->B->cmap->n; i++) { 3741 colors[i] = allcolors[a->garray[i]]; 3742 } 3743 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3744 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3745 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3746 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3747 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3748 ISColoringValue *colors; 3749 PetscInt *larray; 3750 ISColoring ocoloring; 3751 3752 /* set coloring for diagonal portion */ 3753 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3754 for (i=0; i<a->A->cmap->n; i++) { 3755 larray[i] = i + A->cmap->rstart; 3756 } 3757 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3758 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3759 for (i=0; i<a->A->cmap->n; i++) { 3760 colors[i] = coloring->colors[larray[i]]; 3761 } 3762 ierr = PetscFree(larray);CHKERRQ(ierr); 3763 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3764 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3765 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3766 3767 /* set coloring for off-diagonal portion */ 3768 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3769 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3770 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3771 for (i=0; i<a->B->cmap->n; i++) { 3772 colors[i] = coloring->colors[larray[i]]; 3773 } 3774 ierr = PetscFree(larray);CHKERRQ(ierr); 3775 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3776 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3777 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3778 } else { 3779 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3780 } 3781 3782 PetscFunctionReturn(0); 3783 } 3784 3785 #if defined(PETSC_HAVE_ADIC) 3786 #undef __FUNCT__ 3787 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3788 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3789 { 3790 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3791 PetscErrorCode ierr; 3792 3793 PetscFunctionBegin; 3794 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3795 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3796 PetscFunctionReturn(0); 3797 } 3798 #endif 3799 3800 #undef __FUNCT__ 3801 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3802 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3803 { 3804 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3805 PetscErrorCode ierr; 3806 3807 PetscFunctionBegin; 3808 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3809 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3810 PetscFunctionReturn(0); 3811 } 3812 3813 #undef __FUNCT__ 3814 #define __FUNCT__ "MatMerge" 3815 /*@ 3816 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3817 matrices from each processor 3818 3819 Collective on MPI_Comm 3820 3821 Input Parameters: 3822 + comm - the communicators the parallel matrix will live on 3823 . inmat - the input sequential matrices 3824 . n - number of local columns (or PETSC_DECIDE) 3825 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3826 3827 Output Parameter: 3828 . outmat - the parallel matrix generated 3829 3830 Level: advanced 3831 3832 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3833 3834 @*/ 3835 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3836 { 3837 PetscErrorCode ierr; 3838 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3839 PetscInt *indx; 3840 PetscScalar *values; 3841 3842 PetscFunctionBegin; 3843 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3844 if (scall == MAT_INITIAL_MATRIX){ 3845 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3846 if (n == PETSC_DECIDE){ 3847 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3848 } 3849 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3850 rstart -= m; 3851 3852 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3853 for (i=0;i<m;i++) { 3854 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3855 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3856 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3857 } 3858 /* This routine will ONLY return MPIAIJ type matrix */ 3859 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3860 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3861 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3862 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3863 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3864 3865 } else if (scall == MAT_REUSE_MATRIX){ 3866 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3867 } else { 3868 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3869 } 3870 3871 for (i=0;i<m;i++) { 3872 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3873 Ii = i + rstart; 3874 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3875 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3876 } 3877 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3878 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3880 3881 PetscFunctionReturn(0); 3882 } 3883 3884 #undef __FUNCT__ 3885 #define __FUNCT__ "MatFileSplit" 3886 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3887 { 3888 PetscErrorCode ierr; 3889 PetscMPIInt rank; 3890 PetscInt m,N,i,rstart,nnz; 3891 size_t len; 3892 const PetscInt *indx; 3893 PetscViewer out; 3894 char *name; 3895 Mat B; 3896 const PetscScalar *values; 3897 3898 PetscFunctionBegin; 3899 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3900 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3901 /* Should this be the type of the diagonal block of A? */ 3902 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3903 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3904 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3905 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3906 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3907 for (i=0;i<m;i++) { 3908 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3909 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3910 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3911 } 3912 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3913 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3914 3915 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3916 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3917 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3918 sprintf(name,"%s.%d",outfile,rank); 3919 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3920 ierr = PetscFree(name); 3921 ierr = MatView(B,out);CHKERRQ(ierr); 3922 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3923 ierr = MatDestroy(B);CHKERRQ(ierr); 3924 PetscFunctionReturn(0); 3925 } 3926 3927 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3928 #undef __FUNCT__ 3929 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3930 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3931 { 3932 PetscErrorCode ierr; 3933 Mat_Merge_SeqsToMPI *merge; 3934 PetscContainer container; 3935 3936 PetscFunctionBegin; 3937 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3938 if (container) { 3939 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3940 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3941 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3942 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3943 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3944 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3945 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3946 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3947 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3948 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3949 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3950 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3951 3952 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3953 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3954 } 3955 ierr = PetscFree(merge);CHKERRQ(ierr); 3956 3957 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3958 PetscFunctionReturn(0); 3959 } 3960 3961 #include "../src/mat/utils/freespace.h" 3962 #include "petscbt.h" 3963 3964 #undef __FUNCT__ 3965 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3966 /*@C 3967 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3968 matrices from each processor 3969 3970 Collective on MPI_Comm 3971 3972 Input Parameters: 3973 + comm - the communicators the parallel matrix will live on 3974 . seqmat - the input sequential matrices 3975 . m - number of local rows (or PETSC_DECIDE) 3976 . n - number of local columns (or PETSC_DECIDE) 3977 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3978 3979 Output Parameter: 3980 . mpimat - the parallel matrix generated 3981 3982 Level: advanced 3983 3984 Notes: 3985 The dimensions of the sequential matrix in each processor MUST be the same. 3986 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3987 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3988 @*/ 3989 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3990 { 3991 PetscErrorCode ierr; 3992 MPI_Comm comm=((PetscObject)mpimat)->comm; 3993 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3994 PetscMPIInt size,rank,taga,*len_s; 3995 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 3996 PetscInt proc,m; 3997 PetscInt **buf_ri,**buf_rj; 3998 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3999 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4000 MPI_Request *s_waits,*r_waits; 4001 MPI_Status *status; 4002 MatScalar *aa=a->a; 4003 MatScalar **abuf_r,*ba_i; 4004 Mat_Merge_SeqsToMPI *merge; 4005 PetscContainer container; 4006 4007 PetscFunctionBegin; 4008 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4009 4010 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4011 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4012 4013 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4014 if (container) { 4015 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4016 } 4017 bi = merge->bi; 4018 bj = merge->bj; 4019 buf_ri = merge->buf_ri; 4020 buf_rj = merge->buf_rj; 4021 4022 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4023 owners = merge->rowmap.range; 4024 len_s = merge->len_s; 4025 4026 /* send and recv matrix values */ 4027 /*-----------------------------*/ 4028 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4029 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4030 4031 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4032 for (proc=0,k=0; proc<size; proc++){ 4033 if (!len_s[proc]) continue; 4034 i = owners[proc]; 4035 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4036 k++; 4037 } 4038 4039 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4040 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4041 ierr = PetscFree(status);CHKERRQ(ierr); 4042 4043 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4044 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4045 4046 /* insert mat values of mpimat */ 4047 /*----------------------------*/ 4048 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4049 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4050 nextrow = buf_ri_k + merge->nrecv; 4051 nextai = nextrow + merge->nrecv; 4052 4053 for (k=0; k<merge->nrecv; k++){ 4054 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4055 nrows = *(buf_ri_k[k]); 4056 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4057 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4058 } 4059 4060 /* set values of ba */ 4061 m = merge->rowmap.n; 4062 for (i=0; i<m; i++) { 4063 arow = owners[rank] + i; 4064 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4065 bnzi = bi[i+1] - bi[i]; 4066 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4067 4068 /* add local non-zero vals of this proc's seqmat into ba */ 4069 anzi = ai[arow+1] - ai[arow]; 4070 aj = a->j + ai[arow]; 4071 aa = a->a + ai[arow]; 4072 nextaj = 0; 4073 for (j=0; nextaj<anzi; j++){ 4074 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4075 ba_i[j] += aa[nextaj++]; 4076 } 4077 } 4078 4079 /* add received vals into ba */ 4080 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4081 /* i-th row */ 4082 if (i == *nextrow[k]) { 4083 anzi = *(nextai[k]+1) - *nextai[k]; 4084 aj = buf_rj[k] + *(nextai[k]); 4085 aa = abuf_r[k] + *(nextai[k]); 4086 nextaj = 0; 4087 for (j=0; nextaj<anzi; j++){ 4088 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4089 ba_i[j] += aa[nextaj++]; 4090 } 4091 } 4092 nextrow[k]++; nextai[k]++; 4093 } 4094 } 4095 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4096 } 4097 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4098 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4099 4100 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4101 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4102 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4103 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4104 PetscFunctionReturn(0); 4105 } 4106 4107 #undef __FUNCT__ 4108 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4109 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4110 { 4111 PetscErrorCode ierr; 4112 Mat B_mpi; 4113 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4114 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4115 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4116 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4117 PetscInt len,proc,*dnz,*onz; 4118 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4119 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4120 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4121 MPI_Status *status; 4122 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4123 PetscBT lnkbt; 4124 Mat_Merge_SeqsToMPI *merge; 4125 PetscContainer container; 4126 4127 PetscFunctionBegin; 4128 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4129 4130 /* make sure it is a PETSc comm */ 4131 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4132 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4133 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4134 4135 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4136 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4137 4138 /* determine row ownership */ 4139 /*---------------------------------------------------------*/ 4140 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4141 merge->rowmap.n = m; 4142 merge->rowmap.N = M; 4143 merge->rowmap.bs = 1; 4144 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4145 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4146 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4147 4148 m = merge->rowmap.n; 4149 M = merge->rowmap.N; 4150 owners = merge->rowmap.range; 4151 4152 /* determine the number of messages to send, their lengths */ 4153 /*---------------------------------------------------------*/ 4154 len_s = merge->len_s; 4155 4156 len = 0; /* length of buf_si[] */ 4157 merge->nsend = 0; 4158 for (proc=0; proc<size; proc++){ 4159 len_si[proc] = 0; 4160 if (proc == rank){ 4161 len_s[proc] = 0; 4162 } else { 4163 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4164 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4165 } 4166 if (len_s[proc]) { 4167 merge->nsend++; 4168 nrows = 0; 4169 for (i=owners[proc]; i<owners[proc+1]; i++){ 4170 if (ai[i+1] > ai[i]) nrows++; 4171 } 4172 len_si[proc] = 2*(nrows+1); 4173 len += len_si[proc]; 4174 } 4175 } 4176 4177 /* determine the number and length of messages to receive for ij-structure */ 4178 /*-------------------------------------------------------------------------*/ 4179 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4180 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4181 4182 /* post the Irecv of j-structure */ 4183 /*-------------------------------*/ 4184 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4185 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4186 4187 /* post the Isend of j-structure */ 4188 /*--------------------------------*/ 4189 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4190 sj_waits = si_waits + merge->nsend; 4191 4192 for (proc=0, k=0; proc<size; proc++){ 4193 if (!len_s[proc]) continue; 4194 i = owners[proc]; 4195 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4196 k++; 4197 } 4198 4199 /* receives and sends of j-structure are complete */ 4200 /*------------------------------------------------*/ 4201 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4202 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4203 4204 /* send and recv i-structure */ 4205 /*---------------------------*/ 4206 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4207 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4208 4209 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4210 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4211 for (proc=0,k=0; proc<size; proc++){ 4212 if (!len_s[proc]) continue; 4213 /* form outgoing message for i-structure: 4214 buf_si[0]: nrows to be sent 4215 [1:nrows]: row index (global) 4216 [nrows+1:2*nrows+1]: i-structure index 4217 */ 4218 /*-------------------------------------------*/ 4219 nrows = len_si[proc]/2 - 1; 4220 buf_si_i = buf_si + nrows+1; 4221 buf_si[0] = nrows; 4222 buf_si_i[0] = 0; 4223 nrows = 0; 4224 for (i=owners[proc]; i<owners[proc+1]; i++){ 4225 anzi = ai[i+1] - ai[i]; 4226 if (anzi) { 4227 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4228 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4229 nrows++; 4230 } 4231 } 4232 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4233 k++; 4234 buf_si += len_si[proc]; 4235 } 4236 4237 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4238 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4239 4240 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4241 for (i=0; i<merge->nrecv; i++){ 4242 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4243 } 4244 4245 ierr = PetscFree(len_si);CHKERRQ(ierr); 4246 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4247 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4248 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4249 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4250 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4251 ierr = PetscFree(status);CHKERRQ(ierr); 4252 4253 /* compute a local seq matrix in each processor */ 4254 /*----------------------------------------------*/ 4255 /* allocate bi array and free space for accumulating nonzero column info */ 4256 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4257 bi[0] = 0; 4258 4259 /* create and initialize a linked list */ 4260 nlnk = N+1; 4261 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4262 4263 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4264 len = 0; 4265 len = ai[owners[rank+1]] - ai[owners[rank]]; 4266 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4267 current_space = free_space; 4268 4269 /* determine symbolic info for each local row */ 4270 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4271 nextrow = buf_ri_k + merge->nrecv; 4272 nextai = nextrow + merge->nrecv; 4273 for (k=0; k<merge->nrecv; k++){ 4274 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4275 nrows = *buf_ri_k[k]; 4276 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4277 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4278 } 4279 4280 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4281 len = 0; 4282 for (i=0;i<m;i++) { 4283 bnzi = 0; 4284 /* add local non-zero cols of this proc's seqmat into lnk */ 4285 arow = owners[rank] + i; 4286 anzi = ai[arow+1] - ai[arow]; 4287 aj = a->j + ai[arow]; 4288 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4289 bnzi += nlnk; 4290 /* add received col data into lnk */ 4291 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4292 if (i == *nextrow[k]) { /* i-th row */ 4293 anzi = *(nextai[k]+1) - *nextai[k]; 4294 aj = buf_rj[k] + *nextai[k]; 4295 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4296 bnzi += nlnk; 4297 nextrow[k]++; nextai[k]++; 4298 } 4299 } 4300 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4301 4302 /* if free space is not available, make more free space */ 4303 if (current_space->local_remaining<bnzi) { 4304 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4305 nspacedouble++; 4306 } 4307 /* copy data into free space, then initialize lnk */ 4308 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4309 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4310 4311 current_space->array += bnzi; 4312 current_space->local_used += bnzi; 4313 current_space->local_remaining -= bnzi; 4314 4315 bi[i+1] = bi[i] + bnzi; 4316 } 4317 4318 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4319 4320 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4321 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4322 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4323 4324 /* create symbolic parallel matrix B_mpi */ 4325 /*---------------------------------------*/ 4326 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4327 if (n==PETSC_DECIDE) { 4328 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4329 } else { 4330 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4331 } 4332 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4333 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4334 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4335 4336 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4337 B_mpi->assembled = PETSC_FALSE; 4338 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4339 merge->bi = bi; 4340 merge->bj = bj; 4341 merge->buf_ri = buf_ri; 4342 merge->buf_rj = buf_rj; 4343 merge->coi = PETSC_NULL; 4344 merge->coj = PETSC_NULL; 4345 merge->owners_co = PETSC_NULL; 4346 4347 /* attach the supporting struct to B_mpi for reuse */ 4348 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4349 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4350 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4351 *mpimat = B_mpi; 4352 4353 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4354 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4355 PetscFunctionReturn(0); 4356 } 4357 4358 #undef __FUNCT__ 4359 #define __FUNCT__ "MatMerge_SeqsToMPI" 4360 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4361 { 4362 PetscErrorCode ierr; 4363 4364 PetscFunctionBegin; 4365 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4366 if (scall == MAT_INITIAL_MATRIX){ 4367 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4368 } 4369 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4370 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4371 PetscFunctionReturn(0); 4372 } 4373 4374 #undef __FUNCT__ 4375 #define __FUNCT__ "MatGetLocalMat" 4376 /*@ 4377 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4378 4379 Not Collective 4380 4381 Input Parameters: 4382 + A - the matrix 4383 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4384 4385 Output Parameter: 4386 . A_loc - the local sequential matrix generated 4387 4388 Level: developer 4389 4390 @*/ 4391 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4392 { 4393 PetscErrorCode ierr; 4394 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4395 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4396 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4397 MatScalar *aa=a->a,*ba=b->a,*cam; 4398 PetscScalar *ca; 4399 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4400 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4401 4402 PetscFunctionBegin; 4403 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4404 if (scall == MAT_INITIAL_MATRIX){ 4405 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4406 ci[0] = 0; 4407 for (i=0; i<am; i++){ 4408 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4409 } 4410 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4411 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4412 k = 0; 4413 for (i=0; i<am; i++) { 4414 ncols_o = bi[i+1] - bi[i]; 4415 ncols_d = ai[i+1] - ai[i]; 4416 /* off-diagonal portion of A */ 4417 for (jo=0; jo<ncols_o; jo++) { 4418 col = cmap[*bj]; 4419 if (col >= cstart) break; 4420 cj[k] = col; bj++; 4421 ca[k++] = *ba++; 4422 } 4423 /* diagonal portion of A */ 4424 for (j=0; j<ncols_d; j++) { 4425 cj[k] = cstart + *aj++; 4426 ca[k++] = *aa++; 4427 } 4428 /* off-diagonal portion of A */ 4429 for (j=jo; j<ncols_o; j++) { 4430 cj[k] = cmap[*bj++]; 4431 ca[k++] = *ba++; 4432 } 4433 } 4434 /* put together the new matrix */ 4435 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4436 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4437 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4438 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4439 mat->free_a = PETSC_TRUE; 4440 mat->free_ij = PETSC_TRUE; 4441 mat->nonew = 0; 4442 } else if (scall == MAT_REUSE_MATRIX){ 4443 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4444 ci = mat->i; cj = mat->j; cam = mat->a; 4445 for (i=0; i<am; i++) { 4446 /* off-diagonal portion of A */ 4447 ncols_o = bi[i+1] - bi[i]; 4448 for (jo=0; jo<ncols_o; jo++) { 4449 col = cmap[*bj]; 4450 if (col >= cstart) break; 4451 *cam++ = *ba++; bj++; 4452 } 4453 /* diagonal portion of A */ 4454 ncols_d = ai[i+1] - ai[i]; 4455 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4456 /* off-diagonal portion of A */ 4457 for (j=jo; j<ncols_o; j++) { 4458 *cam++ = *ba++; bj++; 4459 } 4460 } 4461 } else { 4462 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4463 } 4464 4465 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4466 PetscFunctionReturn(0); 4467 } 4468 4469 #undef __FUNCT__ 4470 #define __FUNCT__ "MatGetLocalMatCondensed" 4471 /*@C 4472 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4473 4474 Not Collective 4475 4476 Input Parameters: 4477 + A - the matrix 4478 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4479 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4480 4481 Output Parameter: 4482 . A_loc - the local sequential matrix generated 4483 4484 Level: developer 4485 4486 @*/ 4487 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4488 { 4489 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4490 PetscErrorCode ierr; 4491 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4492 IS isrowa,iscola; 4493 Mat *aloc; 4494 4495 PetscFunctionBegin; 4496 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4497 if (!row){ 4498 start = A->rmap->rstart; end = A->rmap->rend; 4499 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4500 } else { 4501 isrowa = *row; 4502 } 4503 if (!col){ 4504 start = A->cmap->rstart; 4505 cmap = a->garray; 4506 nzA = a->A->cmap->n; 4507 nzB = a->B->cmap->n; 4508 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4509 ncols = 0; 4510 for (i=0; i<nzB; i++) { 4511 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4512 else break; 4513 } 4514 imark = i; 4515 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4516 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4517 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4518 ierr = PetscFree(idx);CHKERRQ(ierr); 4519 } else { 4520 iscola = *col; 4521 } 4522 if (scall != MAT_INITIAL_MATRIX){ 4523 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4524 aloc[0] = *A_loc; 4525 } 4526 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4527 *A_loc = aloc[0]; 4528 ierr = PetscFree(aloc);CHKERRQ(ierr); 4529 if (!row){ 4530 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4531 } 4532 if (!col){ 4533 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4534 } 4535 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4536 PetscFunctionReturn(0); 4537 } 4538 4539 #undef __FUNCT__ 4540 #define __FUNCT__ "MatGetBrowsOfAcols" 4541 /*@C 4542 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4543 4544 Collective on Mat 4545 4546 Input Parameters: 4547 + A,B - the matrices in mpiaij format 4548 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4549 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4550 4551 Output Parameter: 4552 + rowb, colb - index sets of rows and columns of B to extract 4553 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4554 - B_seq - the sequential matrix generated 4555 4556 Level: developer 4557 4558 @*/ 4559 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4560 { 4561 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4562 PetscErrorCode ierr; 4563 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4564 IS isrowb,iscolb; 4565 Mat *bseq; 4566 4567 PetscFunctionBegin; 4568 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4569 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4570 } 4571 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4572 4573 if (scall == MAT_INITIAL_MATRIX){ 4574 start = A->cmap->rstart; 4575 cmap = a->garray; 4576 nzA = a->A->cmap->n; 4577 nzB = a->B->cmap->n; 4578 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4579 ncols = 0; 4580 for (i=0; i<nzB; i++) { /* row < local row index */ 4581 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4582 else break; 4583 } 4584 imark = i; 4585 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4586 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4587 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4588 ierr = PetscFree(idx);CHKERRQ(ierr); 4589 *brstart = imark; 4590 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4591 } else { 4592 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4593 isrowb = *rowb; iscolb = *colb; 4594 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4595 bseq[0] = *B_seq; 4596 } 4597 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4598 *B_seq = bseq[0]; 4599 ierr = PetscFree(bseq);CHKERRQ(ierr); 4600 if (!rowb){ 4601 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4602 } else { 4603 *rowb = isrowb; 4604 } 4605 if (!colb){ 4606 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4607 } else { 4608 *colb = iscolb; 4609 } 4610 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4611 PetscFunctionReturn(0); 4612 } 4613 4614 #undef __FUNCT__ 4615 #define __FUNCT__ "MatGetBrowsOfAoCols" 4616 /*@C 4617 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4618 of the OFF-DIAGONAL portion of local A 4619 4620 Collective on Mat 4621 4622 Input Parameters: 4623 + A,B - the matrices in mpiaij format 4624 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4625 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4626 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4627 4628 Output Parameter: 4629 + B_oth - the sequential matrix generated 4630 4631 Level: developer 4632 4633 @*/ 4634 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4635 { 4636 VecScatter_MPI_General *gen_to,*gen_from; 4637 PetscErrorCode ierr; 4638 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4639 Mat_SeqAIJ *b_oth; 4640 VecScatter ctx=a->Mvctx; 4641 MPI_Comm comm=((PetscObject)ctx)->comm; 4642 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4643 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4644 PetscScalar *rvalues,*svalues; 4645 MatScalar *b_otha,*bufa,*bufA; 4646 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4647 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4648 MPI_Status *sstatus,rstatus; 4649 PetscMPIInt jj; 4650 PetscInt *cols,sbs,rbs; 4651 PetscScalar *vals; 4652 4653 PetscFunctionBegin; 4654 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4655 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4656 } 4657 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4658 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4659 4660 gen_to = (VecScatter_MPI_General*)ctx->todata; 4661 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4662 rvalues = gen_from->values; /* holds the length of receiving row */ 4663 svalues = gen_to->values; /* holds the length of sending row */ 4664 nrecvs = gen_from->n; 4665 nsends = gen_to->n; 4666 4667 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4668 srow = gen_to->indices; /* local row index to be sent */ 4669 sstarts = gen_to->starts; 4670 sprocs = gen_to->procs; 4671 sstatus = gen_to->sstatus; 4672 sbs = gen_to->bs; 4673 rstarts = gen_from->starts; 4674 rprocs = gen_from->procs; 4675 rbs = gen_from->bs; 4676 4677 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4678 if (scall == MAT_INITIAL_MATRIX){ 4679 /* i-array */ 4680 /*---------*/ 4681 /* post receives */ 4682 for (i=0; i<nrecvs; i++){ 4683 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4684 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4685 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4686 } 4687 4688 /* pack the outgoing message */ 4689 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4690 rstartsj = sstartsj + nsends +1; 4691 sstartsj[0] = 0; rstartsj[0] = 0; 4692 len = 0; /* total length of j or a array to be sent */ 4693 k = 0; 4694 for (i=0; i<nsends; i++){ 4695 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4696 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4697 for (j=0; j<nrows; j++) { 4698 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4699 for (l=0; l<sbs; l++){ 4700 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4701 rowlen[j*sbs+l] = ncols; 4702 len += ncols; 4703 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4704 } 4705 k++; 4706 } 4707 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4708 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4709 } 4710 /* recvs and sends of i-array are completed */ 4711 i = nrecvs; 4712 while (i--) { 4713 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4714 } 4715 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4716 4717 /* allocate buffers for sending j and a arrays */ 4718 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4719 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4720 4721 /* create i-array of B_oth */ 4722 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4723 b_othi[0] = 0; 4724 len = 0; /* total length of j or a array to be received */ 4725 k = 0; 4726 for (i=0; i<nrecvs; i++){ 4727 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4728 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4729 for (j=0; j<nrows; j++) { 4730 b_othi[k+1] = b_othi[k] + rowlen[j]; 4731 len += rowlen[j]; k++; 4732 } 4733 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4734 } 4735 4736 /* allocate space for j and a arrrays of B_oth */ 4737 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4738 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4739 4740 /* j-array */ 4741 /*---------*/ 4742 /* post receives of j-array */ 4743 for (i=0; i<nrecvs; i++){ 4744 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4745 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4746 } 4747 4748 /* pack the outgoing message j-array */ 4749 k = 0; 4750 for (i=0; i<nsends; i++){ 4751 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4752 bufJ = bufj+sstartsj[i]; 4753 for (j=0; j<nrows; j++) { 4754 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4755 for (ll=0; ll<sbs; ll++){ 4756 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4757 for (l=0; l<ncols; l++){ 4758 *bufJ++ = cols[l]; 4759 } 4760 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4761 } 4762 } 4763 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4764 } 4765 4766 /* recvs and sends of j-array are completed */ 4767 i = nrecvs; 4768 while (i--) { 4769 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4770 } 4771 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4772 } else if (scall == MAT_REUSE_MATRIX){ 4773 sstartsj = *startsj; 4774 rstartsj = sstartsj + nsends +1; 4775 bufa = *bufa_ptr; 4776 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4777 b_otha = b_oth->a; 4778 } else { 4779 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4780 } 4781 4782 /* a-array */ 4783 /*---------*/ 4784 /* post receives of a-array */ 4785 for (i=0; i<nrecvs; i++){ 4786 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4787 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4788 } 4789 4790 /* pack the outgoing message a-array */ 4791 k = 0; 4792 for (i=0; i<nsends; i++){ 4793 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4794 bufA = bufa+sstartsj[i]; 4795 for (j=0; j<nrows; j++) { 4796 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4797 for (ll=0; ll<sbs; ll++){ 4798 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4799 for (l=0; l<ncols; l++){ 4800 *bufA++ = vals[l]; 4801 } 4802 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4803 } 4804 } 4805 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4806 } 4807 /* recvs and sends of a-array are completed */ 4808 i = nrecvs; 4809 while (i--) { 4810 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4811 } 4812 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4813 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4814 4815 if (scall == MAT_INITIAL_MATRIX){ 4816 /* put together the new matrix */ 4817 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4818 4819 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4820 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4821 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4822 b_oth->free_a = PETSC_TRUE; 4823 b_oth->free_ij = PETSC_TRUE; 4824 b_oth->nonew = 0; 4825 4826 ierr = PetscFree(bufj);CHKERRQ(ierr); 4827 if (!startsj || !bufa_ptr){ 4828 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4829 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4830 } else { 4831 *startsj = sstartsj; 4832 *bufa_ptr = bufa; 4833 } 4834 } 4835 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4836 PetscFunctionReturn(0); 4837 } 4838 4839 #undef __FUNCT__ 4840 #define __FUNCT__ "MatGetCommunicationStructs" 4841 /*@C 4842 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4843 4844 Not Collective 4845 4846 Input Parameters: 4847 . A - The matrix in mpiaij format 4848 4849 Output Parameter: 4850 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4851 . colmap - A map from global column index to local index into lvec 4852 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4853 4854 Level: developer 4855 4856 @*/ 4857 #if defined (PETSC_USE_CTABLE) 4858 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4859 #else 4860 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4861 #endif 4862 { 4863 Mat_MPIAIJ *a; 4864 4865 PetscFunctionBegin; 4866 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4867 PetscValidPointer(lvec, 2) 4868 PetscValidPointer(colmap, 3) 4869 PetscValidPointer(multScatter, 4) 4870 a = (Mat_MPIAIJ *) A->data; 4871 if (lvec) *lvec = a->lvec; 4872 if (colmap) *colmap = a->colmap; 4873 if (multScatter) *multScatter = a->Mvctx; 4874 PetscFunctionReturn(0); 4875 } 4876 4877 EXTERN_C_BEGIN 4878 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4879 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4880 EXTERN_C_END 4881 4882 #include "../src/mat/impls/dense/mpi/mpidense.h" 4883 4884 #undef __FUNCT__ 4885 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4886 /* 4887 Computes (B'*A')' since computing B*A directly is untenable 4888 4889 n p p 4890 ( ) ( ) ( ) 4891 m ( A ) * n ( B ) = m ( C ) 4892 ( ) ( ) ( ) 4893 4894 */ 4895 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4896 { 4897 PetscErrorCode ierr; 4898 Mat At,Bt,Ct; 4899 4900 PetscFunctionBegin; 4901 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4902 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4903 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4904 ierr = MatDestroy(At);CHKERRQ(ierr); 4905 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4906 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4907 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4908 PetscFunctionReturn(0); 4909 } 4910 4911 #undef __FUNCT__ 4912 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4913 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4914 { 4915 PetscErrorCode ierr; 4916 PetscInt m=A->rmap->n,n=B->cmap->n; 4917 Mat Cmat; 4918 4919 PetscFunctionBegin; 4920 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4921 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4922 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4923 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4924 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4925 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4926 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4927 *C = Cmat; 4928 PetscFunctionReturn(0); 4929 } 4930 4931 /* ----------------------------------------------------------------*/ 4932 #undef __FUNCT__ 4933 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4934 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4935 { 4936 PetscErrorCode ierr; 4937 4938 PetscFunctionBegin; 4939 if (scall == MAT_INITIAL_MATRIX){ 4940 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4941 } 4942 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4943 PetscFunctionReturn(0); 4944 } 4945 4946 EXTERN_C_BEGIN 4947 #if defined(PETSC_HAVE_MUMPS) 4948 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4949 #endif 4950 #if defined(PETSC_HAVE_PASTIX) 4951 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4952 #endif 4953 #if defined(PETSC_HAVE_SUPERLU_DIST) 4954 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4955 #endif 4956 #if defined(PETSC_HAVE_SPOOLES) 4957 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4958 #endif 4959 EXTERN_C_END 4960 4961 /*MC 4962 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4963 4964 Options Database Keys: 4965 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4966 4967 Level: beginner 4968 4969 .seealso: MatCreateMPIAIJ() 4970 M*/ 4971 4972 EXTERN_C_BEGIN 4973 #undef __FUNCT__ 4974 #define __FUNCT__ "MatCreate_MPIAIJ" 4975 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4976 { 4977 Mat_MPIAIJ *b; 4978 PetscErrorCode ierr; 4979 PetscMPIInt size; 4980 4981 PetscFunctionBegin; 4982 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4983 4984 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4985 B->data = (void*)b; 4986 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4987 B->rmap->bs = 1; 4988 B->assembled = PETSC_FALSE; 4989 B->mapping = 0; 4990 4991 B->insertmode = NOT_SET_VALUES; 4992 b->size = size; 4993 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4994 4995 /* build cache for off array entries formed */ 4996 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4997 b->donotstash = PETSC_FALSE; 4998 b->colmap = 0; 4999 b->garray = 0; 5000 b->roworiented = PETSC_TRUE; 5001 5002 /* stuff used for matrix vector multiply */ 5003 b->lvec = PETSC_NULL; 5004 b->Mvctx = PETSC_NULL; 5005 5006 /* stuff for MatGetRow() */ 5007 b->rowindices = 0; 5008 b->rowvalues = 0; 5009 b->getrowactive = PETSC_FALSE; 5010 5011 #if defined(PETSC_HAVE_SPOOLES) 5012 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 5013 "MatGetFactor_mpiaij_spooles", 5014 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5015 #endif 5016 #if defined(PETSC_HAVE_MUMPS) 5017 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 5018 "MatGetFactor_mpiaij_mumps", 5019 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5020 #endif 5021 #if defined(PETSC_HAVE_PASTIX) 5022 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 5023 "MatGetFactor_mpiaij_pastix", 5024 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5025 #endif 5026 #if defined(PETSC_HAVE_SUPERLU_DIST) 5027 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 5028 "MatGetFactor_mpiaij_superlu_dist", 5029 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5030 #endif 5031 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5032 "MatStoreValues_MPIAIJ", 5033 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5034 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5035 "MatRetrieveValues_MPIAIJ", 5036 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5037 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5038 "MatGetDiagonalBlock_MPIAIJ", 5039 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5040 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5041 "MatIsTranspose_MPIAIJ", 5042 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5043 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5044 "MatMPIAIJSetPreallocation_MPIAIJ", 5045 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5046 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5047 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5048 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5049 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5050 "MatDiagonalScaleLocal_MPIAIJ", 5051 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5052 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5053 "MatConvert_MPIAIJ_MPICSRPERM", 5054 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5055 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5056 "MatConvert_MPIAIJ_MPICRL", 5057 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5058 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5059 "MatMatMult_MPIDense_MPIAIJ", 5060 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5061 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5062 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5063 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5064 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5065 "MatMatMultNumeric_MPIDense_MPIAIJ", 5066 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5067 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5068 PetscFunctionReturn(0); 5069 } 5070 EXTERN_C_END 5071 5072 #undef __FUNCT__ 5073 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5074 /*@ 5075 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5076 and "off-diagonal" part of the matrix in CSR format. 5077 5078 Collective on MPI_Comm 5079 5080 Input Parameters: 5081 + comm - MPI communicator 5082 . m - number of local rows (Cannot be PETSC_DECIDE) 5083 . n - This value should be the same as the local size used in creating the 5084 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5085 calculated if N is given) For square matrices n is almost always m. 5086 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5087 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5088 . i - row indices for "diagonal" portion of matrix 5089 . j - column indices 5090 . a - matrix values 5091 . oi - row indices for "off-diagonal" portion of matrix 5092 . oj - column indices 5093 - oa - matrix values 5094 5095 Output Parameter: 5096 . mat - the matrix 5097 5098 Level: advanced 5099 5100 Notes: 5101 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5102 5103 The i and j indices are 0 based 5104 5105 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5106 5107 5108 .keywords: matrix, aij, compressed row, sparse, parallel 5109 5110 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5111 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5112 @*/ 5113 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5114 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5115 { 5116 PetscErrorCode ierr; 5117 Mat_MPIAIJ *maij; 5118 5119 PetscFunctionBegin; 5120 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5121 if (i[0]) { 5122 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5123 } 5124 if (oi[0]) { 5125 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5126 } 5127 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5128 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5129 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5130 maij = (Mat_MPIAIJ*) (*mat)->data; 5131 maij->donotstash = PETSC_TRUE; 5132 (*mat)->preallocated = PETSC_TRUE; 5133 5134 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5135 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5136 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5137 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5138 5139 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5140 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5141 5142 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5143 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5144 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5145 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5146 5147 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5148 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5149 PetscFunctionReturn(0); 5150 } 5151 5152 /* 5153 Special version for direct calls from Fortran 5154 */ 5155 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5156 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5157 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5158 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5159 #endif 5160 5161 /* Change these macros so can be used in void function */ 5162 #undef CHKERRQ 5163 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5164 #undef SETERRQ2 5165 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5166 #undef SETERRQ 5167 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5168 5169 EXTERN_C_BEGIN 5170 #undef __FUNCT__ 5171 #define __FUNCT__ "matsetvaluesmpiaij_" 5172 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5173 { 5174 Mat mat = *mmat; 5175 PetscInt m = *mm, n = *mn; 5176 InsertMode addv = *maddv; 5177 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5178 PetscScalar value; 5179 PetscErrorCode ierr; 5180 5181 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5182 if (mat->insertmode == NOT_SET_VALUES) { 5183 mat->insertmode = addv; 5184 } 5185 #if defined(PETSC_USE_DEBUG) 5186 else if (mat->insertmode != addv) { 5187 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5188 } 5189 #endif 5190 { 5191 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5192 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5193 PetscTruth roworiented = aij->roworiented; 5194 5195 /* Some Variables required in the macro */ 5196 Mat A = aij->A; 5197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5199 MatScalar *aa = a->a; 5200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5201 Mat B = aij->B; 5202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5204 MatScalar *ba = b->a; 5205 5206 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5207 PetscInt nonew = a->nonew; 5208 MatScalar *ap1,*ap2; 5209 5210 PetscFunctionBegin; 5211 for (i=0; i<m; i++) { 5212 if (im[i] < 0) continue; 5213 #if defined(PETSC_USE_DEBUG) 5214 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5215 #endif 5216 if (im[i] >= rstart && im[i] < rend) { 5217 row = im[i] - rstart; 5218 lastcol1 = -1; 5219 rp1 = aj + ai[row]; 5220 ap1 = aa + ai[row]; 5221 rmax1 = aimax[row]; 5222 nrow1 = ailen[row]; 5223 low1 = 0; 5224 high1 = nrow1; 5225 lastcol2 = -1; 5226 rp2 = bj + bi[row]; 5227 ap2 = ba + bi[row]; 5228 rmax2 = bimax[row]; 5229 nrow2 = bilen[row]; 5230 low2 = 0; 5231 high2 = nrow2; 5232 5233 for (j=0; j<n; j++) { 5234 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5235 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5236 if (in[j] >= cstart && in[j] < cend){ 5237 col = in[j] - cstart; 5238 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5239 } else if (in[j] < 0) continue; 5240 #if defined(PETSC_USE_DEBUG) 5241 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5242 #endif 5243 else { 5244 if (mat->was_assembled) { 5245 if (!aij->colmap) { 5246 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5247 } 5248 #if defined (PETSC_USE_CTABLE) 5249 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5250 col--; 5251 #else 5252 col = aij->colmap[in[j]] - 1; 5253 #endif 5254 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5255 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5256 col = in[j]; 5257 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5258 B = aij->B; 5259 b = (Mat_SeqAIJ*)B->data; 5260 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5261 rp2 = bj + bi[row]; 5262 ap2 = ba + bi[row]; 5263 rmax2 = bimax[row]; 5264 nrow2 = bilen[row]; 5265 low2 = 0; 5266 high2 = nrow2; 5267 bm = aij->B->rmap->n; 5268 ba = b->a; 5269 } 5270 } else col = in[j]; 5271 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5272 } 5273 } 5274 } else { 5275 if (!aij->donotstash) { 5276 if (roworiented) { 5277 if (ignorezeroentries && v[i*n] == 0.0) continue; 5278 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5279 } else { 5280 if (ignorezeroentries && v[i] == 0.0) continue; 5281 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5282 } 5283 } 5284 } 5285 }} 5286 PetscFunctionReturnVoid(); 5287 } 5288 EXTERN_C_END 5289 5290