1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "../src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 550 { 551 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 556 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatZeroRows_MPIAIJ" 562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 563 { 564 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 567 PetscInt i,*owners = A->rmap->range; 568 PetscInt *nprocs,j,idx,nsends,row; 569 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 570 PetscInt *rvalues,count,base,slen,*source; 571 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 572 MPI_Comm comm = ((PetscObject)A)->comm; 573 MPI_Request *send_waits,*recv_waits; 574 MPI_Status recv_status,*send_status; 575 #if defined(PETSC_DEBUG) 576 PetscTruth found = PETSC_FALSE; 577 #endif 578 579 PetscFunctionBegin; 580 /* first count number of contributors to each processor */ 581 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 582 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 583 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 584 j = 0; 585 for (i=0; i<N; i++) { 586 if (lastidx > (idx = rows[i])) j = 0; 587 lastidx = idx; 588 for (; j<size; j++) { 589 if (idx >= owners[j] && idx < owners[j+1]) { 590 nprocs[2*j]++; 591 nprocs[2*j+1] = 1; 592 owner[i] = j; 593 #if defined(PETSC_DEBUG) 594 found = PETSC_TRUE; 595 #endif 596 break; 597 } 598 } 599 #if defined(PETSC_DEBUG) 600 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 601 found = PETSC_FALSE; 602 #endif 603 } 604 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 605 606 /* inform other processors of number of messages and max length*/ 607 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 608 609 /* post receives: */ 610 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 611 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 612 for (i=0; i<nrecvs; i++) { 613 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 614 } 615 616 /* do sends: 617 1) starts[i] gives the starting index in svalues for stuff going to 618 the ith processor 619 */ 620 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 621 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 622 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 623 starts[0] = 0; 624 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 625 for (i=0; i<N; i++) { 626 svalues[starts[owner[i]]++] = rows[i]; 627 } 628 629 starts[0] = 0; 630 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 631 count = 0; 632 for (i=0; i<size; i++) { 633 if (nprocs[2*i+1]) { 634 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 635 } 636 } 637 ierr = PetscFree(starts);CHKERRQ(ierr); 638 639 base = owners[rank]; 640 641 /* wait on receives */ 642 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 643 source = lens + nrecvs; 644 count = nrecvs; slen = 0; 645 while (count) { 646 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 647 /* unpack receives into our local space */ 648 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649 source[imdex] = recv_status.MPI_SOURCE; 650 lens[imdex] = n; 651 slen += n; 652 count--; 653 } 654 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 655 656 /* move the data into the send scatter */ 657 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 658 count = 0; 659 for (i=0; i<nrecvs; i++) { 660 values = rvalues + i*nmax; 661 for (j=0; j<lens[i]; j++) { 662 lrows[count++] = values[j] - base; 663 } 664 } 665 ierr = PetscFree(rvalues);CHKERRQ(ierr); 666 ierr = PetscFree(lens);CHKERRQ(ierr); 667 ierr = PetscFree(owner);CHKERRQ(ierr); 668 ierr = PetscFree(nprocs);CHKERRQ(ierr); 669 670 /* actually zap the local rows */ 671 /* 672 Zero the required rows. If the "diagonal block" of the matrix 673 is square and the user wishes to set the diagonal we use separate 674 code so that MatSetValues() is not called for each diagonal allocating 675 new memory, thus calling lots of mallocs and slowing things down. 676 677 Contributed by: Matthew Knepley 678 */ 679 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 680 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 681 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 682 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 683 } else if (diag != 0.0) { 684 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 685 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 686 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 687 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 688 } 689 for (i = 0; i < slen; i++) { 690 row = lrows[i] + rstart; 691 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 692 } 693 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 695 } else { 696 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 697 } 698 ierr = PetscFree(lrows);CHKERRQ(ierr); 699 700 /* wait on sends */ 701 if (nsends) { 702 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 703 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 704 ierr = PetscFree(send_status);CHKERRQ(ierr); 705 } 706 ierr = PetscFree(send_waits);CHKERRQ(ierr); 707 ierr = PetscFree(svalues);CHKERRQ(ierr); 708 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatMult_MPIAIJ" 714 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscInt nt; 719 720 PetscFunctionBegin; 721 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 722 if (nt != A->cmap->n) { 723 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 724 } 725 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 727 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 728 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMultAdd_MPIAIJ" 734 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 735 { 736 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 741 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 742 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 743 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 749 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 750 { 751 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 752 PetscErrorCode ierr; 753 PetscTruth merged; 754 755 PetscFunctionBegin; 756 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 757 /* do nondiagonal part */ 758 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 759 if (!merged) { 760 /* send it on its way */ 761 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 762 /* do local part */ 763 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 764 /* receive remote parts: note this assumes the values are not actually */ 765 /* added in yy until the next line, */ 766 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 767 } else { 768 /* do local part */ 769 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 770 /* send it on its way */ 771 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 772 /* values actually were received in the Begin() but we need to call this nop */ 773 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 774 } 775 PetscFunctionReturn(0); 776 } 777 778 EXTERN_C_BEGIN 779 #undef __FUNCT__ 780 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 781 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 782 { 783 MPI_Comm comm; 784 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 785 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 786 IS Me,Notme; 787 PetscErrorCode ierr; 788 PetscInt M,N,first,last,*notme,i; 789 PetscMPIInt size; 790 791 PetscFunctionBegin; 792 793 /* Easy test: symmetric diagonal block */ 794 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 795 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 796 if (!*f) PetscFunctionReturn(0); 797 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 798 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 799 if (size == 1) PetscFunctionReturn(0); 800 801 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 802 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 803 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 804 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 805 for (i=0; i<first; i++) notme[i] = i; 806 for (i=last; i<M; i++) notme[i-last+first] = i; 807 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 808 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 809 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 810 Aoff = Aoffs[0]; 811 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 812 Boff = Boffs[0]; 813 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 815 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 816 ierr = ISDestroy(Me);CHKERRQ(ierr); 817 ierr = ISDestroy(Notme);CHKERRQ(ierr); 818 819 PetscFunctionReturn(0); 820 } 821 EXTERN_C_END 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 825 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 826 { 827 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 828 PetscErrorCode ierr; 829 830 PetscFunctionBegin; 831 /* do nondiagonal part */ 832 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 833 /* send it on its way */ 834 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 835 /* do local part */ 836 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 837 /* receive remote parts */ 838 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 /* 843 This only works correctly for square matrices where the subblock A->A is the 844 diagonal block 845 */ 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 848 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 849 { 850 PetscErrorCode ierr; 851 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 852 853 PetscFunctionBegin; 854 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 855 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 856 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 857 } 858 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatScale_MPIAIJ" 864 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 865 { 866 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 867 PetscErrorCode ierr; 868 869 PetscFunctionBegin; 870 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 871 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatDestroy_MPIAIJ" 877 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 878 { 879 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 880 PetscErrorCode ierr; 881 882 PetscFunctionBegin; 883 #if defined(PETSC_USE_LOG) 884 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 885 #endif 886 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 887 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 888 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 889 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 890 #if defined (PETSC_USE_CTABLE) 891 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 892 #else 893 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 894 #endif 895 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 896 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 897 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 898 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 899 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 900 ierr = PetscFree(aij);CHKERRQ(ierr); 901 902 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatView_MPIAIJ_Binary" 915 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 916 { 917 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 918 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 919 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 920 PetscErrorCode ierr; 921 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 922 int fd; 923 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 924 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 925 PetscScalar *column_values; 926 927 PetscFunctionBegin; 928 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 929 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 930 nz = A->nz + B->nz; 931 if (!rank) { 932 header[0] = MAT_FILE_COOKIE; 933 header[1] = mat->rmap->N; 934 header[2] = mat->cmap->N; 935 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 936 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 937 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 938 /* get largest number of rows any processor has */ 939 rlen = mat->rmap->n; 940 range = mat->rmap->range; 941 for (i=1; i<size; i++) { 942 rlen = PetscMax(rlen,range[i+1] - range[i]); 943 } 944 } else { 945 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 946 rlen = mat->rmap->n; 947 } 948 949 /* load up the local row counts */ 950 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 951 for (i=0; i<mat->rmap->n; i++) { 952 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 953 } 954 955 /* store the row lengths to the file */ 956 if (!rank) { 957 MPI_Status status; 958 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 959 for (i=1; i<size; i++) { 960 rlen = range[i+1] - range[i]; 961 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 962 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 963 } 964 } else { 965 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 966 } 967 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 968 969 /* load up the local column indices */ 970 nzmax = nz; /* )th processor needs space a largest processor needs */ 971 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 972 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 973 cnt = 0; 974 for (i=0; i<mat->rmap->n; i++) { 975 for (j=B->i[i]; j<B->i[i+1]; j++) { 976 if ( (col = garray[B->j[j]]) > cstart) break; 977 column_indices[cnt++] = col; 978 } 979 for (k=A->i[i]; k<A->i[i+1]; k++) { 980 column_indices[cnt++] = A->j[k] + cstart; 981 } 982 for (; j<B->i[i+1]; j++) { 983 column_indices[cnt++] = garray[B->j[j]]; 984 } 985 } 986 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 987 988 /* store the column indices to the file */ 989 if (!rank) { 990 MPI_Status status; 991 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 992 for (i=1; i<size; i++) { 993 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 995 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 996 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 } 998 } else { 999 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1000 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1001 } 1002 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1003 1004 /* load up the local column values */ 1005 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1006 cnt = 0; 1007 for (i=0; i<mat->rmap->n; i++) { 1008 for (j=B->i[i]; j<B->i[i+1]; j++) { 1009 if ( garray[B->j[j]] > cstart) break; 1010 column_values[cnt++] = B->a[j]; 1011 } 1012 for (k=A->i[i]; k<A->i[i+1]; k++) { 1013 column_values[cnt++] = A->a[k]; 1014 } 1015 for (; j<B->i[i+1]; j++) { 1016 column_values[cnt++] = B->a[j]; 1017 } 1018 } 1019 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1020 1021 /* store the column values to the file */ 1022 if (!rank) { 1023 MPI_Status status; 1024 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1025 for (i=1; i<size; i++) { 1026 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1028 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1029 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1030 } 1031 } else { 1032 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1033 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1034 } 1035 ierr = PetscFree(column_values);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1041 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1042 { 1043 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1044 PetscErrorCode ierr; 1045 PetscMPIInt rank = aij->rank,size = aij->size; 1046 PetscTruth isdraw,iascii,isbinary; 1047 PetscViewer sviewer; 1048 PetscViewerFormat format; 1049 1050 PetscFunctionBegin; 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1052 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1053 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1054 if (iascii) { 1055 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1056 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1057 MatInfo info; 1058 PetscTruth inodes; 1059 1060 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1061 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1062 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1063 if (!inodes) { 1064 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1065 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1066 } else { 1067 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1068 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1069 } 1070 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1074 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1075 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1076 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1079 PetscInt inodecount,inodelimit,*inodes; 1080 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1081 if (inodes) { 1082 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1083 } else { 1084 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1085 } 1086 PetscFunctionReturn(0); 1087 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1088 PetscFunctionReturn(0); 1089 } 1090 } else if (isbinary) { 1091 if (size == 1) { 1092 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1093 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1094 } else { 1095 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1096 } 1097 PetscFunctionReturn(0); 1098 } else if (isdraw) { 1099 PetscDraw draw; 1100 PetscTruth isnull; 1101 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1102 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1103 } 1104 1105 if (size == 1) { 1106 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1107 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1108 } else { 1109 /* assemble the entire matrix onto first processor. */ 1110 Mat A; 1111 Mat_SeqAIJ *Aloc; 1112 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1113 MatScalar *a; 1114 1115 if (mat->rmap->N > 1024) { 1116 PetscTruth flg = PETSC_FALSE; 1117 1118 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1119 if (!flg) { 1120 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1121 } 1122 } 1123 1124 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1125 if (!rank) { 1126 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1127 } else { 1128 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1129 } 1130 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1131 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1132 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1133 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1134 1135 /* copy over the A part */ 1136 Aloc = (Mat_SeqAIJ*)aij->A->data; 1137 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1138 row = mat->rmap->rstart; 1139 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1140 for (i=0; i<m; i++) { 1141 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1142 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1143 } 1144 aj = Aloc->j; 1145 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1146 1147 /* copy over the B part */ 1148 Aloc = (Mat_SeqAIJ*)aij->B->data; 1149 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1152 ct = cols; 1153 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1154 for (i=0; i<m; i++) { 1155 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1156 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1157 } 1158 ierr = PetscFree(ct);CHKERRQ(ierr); 1159 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1160 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1161 /* 1162 Everyone has to call to draw the matrix since the graphics waits are 1163 synchronized across all processors that share the PetscDraw object 1164 */ 1165 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1166 if (!rank) { 1167 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1168 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1169 } 1170 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 ierr = MatDestroy(A);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatView_MPIAIJ" 1178 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1179 { 1180 PetscErrorCode ierr; 1181 PetscTruth iascii,isdraw,issocket,isbinary; 1182 1183 PetscFunctionBegin; 1184 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1185 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1186 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1187 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1188 if (iascii || isdraw || isbinary || issocket) { 1189 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1190 } else { 1191 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1192 } 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatRelax_MPIAIJ" 1198 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1199 { 1200 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1201 PetscErrorCode ierr; 1202 Vec bb1 = 0; 1203 1204 PetscFunctionBegin; 1205 if (its > 1) {ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);} 1206 1207 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1208 if (flag & SOR_ZERO_INITIAL_GUESS) { 1209 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1210 its--; 1211 } 1212 1213 while (its--) { 1214 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1215 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1216 1217 /* update rhs: bb1 = bb - B*x */ 1218 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1219 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1220 1221 /* local sweep */ 1222 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1223 } 1224 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1225 if (flag & SOR_ZERO_INITIAL_GUESS) { 1226 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1227 its--; 1228 } 1229 while (its--) { 1230 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 1233 /* update rhs: bb1 = bb - B*x */ 1234 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1235 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1236 1237 /* local sweep */ 1238 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1239 } 1240 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1241 if (flag & SOR_ZERO_INITIAL_GUESS) { 1242 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1243 its--; 1244 } 1245 while (its--) { 1246 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1247 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 1249 /* update rhs: bb1 = bb - B*x */ 1250 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1251 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1252 1253 /* local sweep */ 1254 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1255 } 1256 } else if (flag & SOR_EISENSTAT) { 1257 Vec xx1; 1258 1259 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1260 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1261 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1262 1263 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 if (!mat->diag) { 1266 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1267 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1268 } 1269 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1270 ierr = VecWAXPY(bb1,-1.0,bb1,bb);CHKERRQ(ierr); 1271 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1272 1273 /* local sweep */ 1274 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1275 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1276 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1277 } else { 1278 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1279 } 1280 1281 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatPermute_MPIAIJ" 1287 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1288 { 1289 MPI_Comm comm,pcomm; 1290 PetscInt first,local_size,nrows; 1291 const PetscInt *rows; 1292 PetscMPIInt size; 1293 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1294 PetscErrorCode ierr; 1295 1296 PetscFunctionBegin; 1297 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1298 /* make a collective version of 'rowp' */ 1299 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1300 if (pcomm==comm) { 1301 crowp = rowp; 1302 } else { 1303 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1304 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1305 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1306 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1307 } 1308 /* collect the global row permutation and invert it */ 1309 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1310 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1311 if (pcomm!=comm) { 1312 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1313 } 1314 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1315 /* get the local target indices */ 1316 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1317 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1318 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1319 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1320 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1321 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1322 /* the column permutation is so much easier; 1323 make a local version of 'colp' and invert it */ 1324 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1326 if (size==1) { 1327 lcolp = colp; 1328 } else { 1329 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1330 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1331 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1332 } 1333 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1334 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1335 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1336 if (size>1) { 1337 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1338 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1339 } 1340 /* now we just get the submatrix */ 1341 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1342 /* clean up */ 1343 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1344 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1350 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1351 { 1352 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1353 Mat A = mat->A,B = mat->B; 1354 PetscErrorCode ierr; 1355 PetscReal isend[5],irecv[5]; 1356 1357 PetscFunctionBegin; 1358 info->block_size = 1.0; 1359 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1360 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1361 isend[3] = info->memory; isend[4] = info->mallocs; 1362 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1363 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1364 isend[3] += info->memory; isend[4] += info->mallocs; 1365 if (flag == MAT_LOCAL) { 1366 info->nz_used = isend[0]; 1367 info->nz_allocated = isend[1]; 1368 info->nz_unneeded = isend[2]; 1369 info->memory = isend[3]; 1370 info->mallocs = isend[4]; 1371 } else if (flag == MAT_GLOBAL_MAX) { 1372 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1373 info->nz_used = irecv[0]; 1374 info->nz_allocated = irecv[1]; 1375 info->nz_unneeded = irecv[2]; 1376 info->memory = irecv[3]; 1377 info->mallocs = irecv[4]; 1378 } else if (flag == MAT_GLOBAL_SUM) { 1379 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1380 info->nz_used = irecv[0]; 1381 info->nz_allocated = irecv[1]; 1382 info->nz_unneeded = irecv[2]; 1383 info->memory = irecv[3]; 1384 info->mallocs = irecv[4]; 1385 } 1386 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1387 info->fill_ratio_needed = 0; 1388 info->factor_mallocs = 0; 1389 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatSetOption_MPIAIJ" 1395 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1396 { 1397 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1398 PetscErrorCode ierr; 1399 1400 PetscFunctionBegin; 1401 switch (op) { 1402 case MAT_NEW_NONZERO_LOCATIONS: 1403 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1404 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1405 case MAT_KEEP_NONZERO_PATTERN: 1406 case MAT_NEW_NONZERO_LOCATION_ERR: 1407 case MAT_USE_INODES: 1408 case MAT_IGNORE_ZERO_ENTRIES: 1409 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1410 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1411 break; 1412 case MAT_ROW_ORIENTED: 1413 a->roworiented = flg; 1414 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1415 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1416 break; 1417 case MAT_NEW_DIAGONALS: 1418 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1419 break; 1420 case MAT_IGNORE_OFF_PROC_ENTRIES: 1421 a->donotstash = PETSC_TRUE; 1422 break; 1423 case MAT_SYMMETRIC: 1424 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1425 break; 1426 case MAT_STRUCTURALLY_SYMMETRIC: 1427 case MAT_HERMITIAN: 1428 case MAT_SYMMETRY_ETERNAL: 1429 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1430 break; 1431 default: 1432 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1433 } 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatGetRow_MPIAIJ" 1439 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1440 { 1441 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1442 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1443 PetscErrorCode ierr; 1444 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1445 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1446 PetscInt *cmap,*idx_p; 1447 1448 PetscFunctionBegin; 1449 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1450 mat->getrowactive = PETSC_TRUE; 1451 1452 if (!mat->rowvalues && (idx || v)) { 1453 /* 1454 allocate enough space to hold information from the longest row. 1455 */ 1456 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1457 PetscInt max = 1,tmp; 1458 for (i=0; i<matin->rmap->n; i++) { 1459 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1460 if (max < tmp) { max = tmp; } 1461 } 1462 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1463 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1464 } 1465 1466 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1467 lrow = row - rstart; 1468 1469 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1470 if (!v) {pvA = 0; pvB = 0;} 1471 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1472 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1473 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1474 nztot = nzA + nzB; 1475 1476 cmap = mat->garray; 1477 if (v || idx) { 1478 if (nztot) { 1479 /* Sort by increasing column numbers, assuming A and B already sorted */ 1480 PetscInt imark = -1; 1481 if (v) { 1482 *v = v_p = mat->rowvalues; 1483 for (i=0; i<nzB; i++) { 1484 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1485 else break; 1486 } 1487 imark = i; 1488 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1489 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1490 } 1491 if (idx) { 1492 *idx = idx_p = mat->rowindices; 1493 if (imark > -1) { 1494 for (i=0; i<imark; i++) { 1495 idx_p[i] = cmap[cworkB[i]]; 1496 } 1497 } else { 1498 for (i=0; i<nzB; i++) { 1499 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1500 else break; 1501 } 1502 imark = i; 1503 } 1504 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1505 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1506 } 1507 } else { 1508 if (idx) *idx = 0; 1509 if (v) *v = 0; 1510 } 1511 } 1512 *nz = nztot; 1513 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1514 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1520 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1523 1524 PetscFunctionBegin; 1525 if (!aij->getrowactive) { 1526 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1527 } 1528 aij->getrowactive = PETSC_FALSE; 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatNorm_MPIAIJ" 1534 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1535 { 1536 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1537 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1538 PetscErrorCode ierr; 1539 PetscInt i,j,cstart = mat->cmap->rstart; 1540 PetscReal sum = 0.0; 1541 MatScalar *v; 1542 1543 PetscFunctionBegin; 1544 if (aij->size == 1) { 1545 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1546 } else { 1547 if (type == NORM_FROBENIUS) { 1548 v = amat->a; 1549 for (i=0; i<amat->nz; i++) { 1550 #if defined(PETSC_USE_COMPLEX) 1551 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1552 #else 1553 sum += (*v)*(*v); v++; 1554 #endif 1555 } 1556 v = bmat->a; 1557 for (i=0; i<bmat->nz; i++) { 1558 #if defined(PETSC_USE_COMPLEX) 1559 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1560 #else 1561 sum += (*v)*(*v); v++; 1562 #endif 1563 } 1564 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1565 *norm = sqrt(*norm); 1566 } else if (type == NORM_1) { /* max column norm */ 1567 PetscReal *tmp,*tmp2; 1568 PetscInt *jj,*garray = aij->garray; 1569 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1570 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1571 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1572 *norm = 0.0; 1573 v = amat->a; jj = amat->j; 1574 for (j=0; j<amat->nz; j++) { 1575 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1576 } 1577 v = bmat->a; jj = bmat->j; 1578 for (j=0; j<bmat->nz; j++) { 1579 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1580 } 1581 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1582 for (j=0; j<mat->cmap->N; j++) { 1583 if (tmp2[j] > *norm) *norm = tmp2[j]; 1584 } 1585 ierr = PetscFree(tmp);CHKERRQ(ierr); 1586 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1587 } else if (type == NORM_INFINITY) { /* max row norm */ 1588 PetscReal ntemp = 0.0; 1589 for (j=0; j<aij->A->rmap->n; j++) { 1590 v = amat->a + amat->i[j]; 1591 sum = 0.0; 1592 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1593 sum += PetscAbsScalar(*v); v++; 1594 } 1595 v = bmat->a + bmat->i[j]; 1596 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1597 sum += PetscAbsScalar(*v); v++; 1598 } 1599 if (sum > ntemp) ntemp = sum; 1600 } 1601 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1602 } else { 1603 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1604 } 1605 } 1606 PetscFunctionReturn(0); 1607 } 1608 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatTranspose_MPIAIJ" 1611 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1612 { 1613 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1614 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1615 PetscErrorCode ierr; 1616 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1617 PetscInt cstart=A->cmap->rstart,ncol; 1618 Mat B; 1619 MatScalar *array; 1620 1621 PetscFunctionBegin; 1622 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1623 1624 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1625 ai = Aloc->i; aj = Aloc->j; 1626 bi = Bloc->i; bj = Bloc->j; 1627 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1628 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1629 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1630 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1631 for (i=0; i<ai[ma]; i++){ 1632 d_nnz[aj[i]] ++; 1633 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1634 } 1635 1636 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1637 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1638 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1639 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1640 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1641 } else { 1642 B = *matout; 1643 } 1644 1645 /* copy over the A part */ 1646 array = Aloc->a; 1647 row = A->rmap->rstart; 1648 for (i=0; i<ma; i++) { 1649 ncol = ai[i+1]-ai[i]; 1650 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1651 row++; array += ncol; aj += ncol; 1652 } 1653 aj = Aloc->j; 1654 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1655 1656 /* copy over the B part */ 1657 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1658 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1659 array = Bloc->a; 1660 row = A->rmap->rstart; 1661 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1662 cols_tmp = cols; 1663 for (i=0; i<mb; i++) { 1664 ncol = bi[i+1]-bi[i]; 1665 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1666 row++; array += ncol; cols_tmp += ncol; 1667 } 1668 ierr = PetscFree(cols);CHKERRQ(ierr); 1669 1670 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1671 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1673 *matout = B; 1674 } else { 1675 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1676 } 1677 PetscFunctionReturn(0); 1678 } 1679 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1682 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1683 { 1684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1685 Mat a = aij->A,b = aij->B; 1686 PetscErrorCode ierr; 1687 PetscInt s1,s2,s3; 1688 1689 PetscFunctionBegin; 1690 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1691 if (rr) { 1692 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1693 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1694 /* Overlap communication with computation. */ 1695 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 } 1697 if (ll) { 1698 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1699 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1700 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1701 } 1702 /* scale the diagonal block */ 1703 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1704 1705 if (rr) { 1706 /* Do a scatter end and then right scale the off-diagonal block */ 1707 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1708 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1709 } 1710 1711 PetscFunctionReturn(0); 1712 } 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1716 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1717 { 1718 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1719 PetscErrorCode ierr; 1720 1721 PetscFunctionBegin; 1722 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1723 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1728 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1729 { 1730 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatEqual_MPIAIJ" 1740 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1741 { 1742 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1743 Mat a,b,c,d; 1744 PetscTruth flg; 1745 PetscErrorCode ierr; 1746 1747 PetscFunctionBegin; 1748 a = matA->A; b = matA->B; 1749 c = matB->A; d = matB->B; 1750 1751 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1752 if (flg) { 1753 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1754 } 1755 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatCopy_MPIAIJ" 1761 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1762 { 1763 PetscErrorCode ierr; 1764 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1765 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1766 1767 PetscFunctionBegin; 1768 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1769 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1770 /* because of the column compression in the off-processor part of the matrix a->B, 1771 the number of columns in a->B and b->B may be different, hence we cannot call 1772 the MatCopy() directly on the two parts. If need be, we can provide a more 1773 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1774 then copying the submatrices */ 1775 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1776 } else { 1777 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1778 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1779 } 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1785 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1786 { 1787 PetscErrorCode ierr; 1788 1789 PetscFunctionBegin; 1790 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #include "petscblaslapack.h" 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatAXPY_MPIAIJ" 1797 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1798 { 1799 PetscErrorCode ierr; 1800 PetscInt i; 1801 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1802 PetscBLASInt bnz,one=1; 1803 Mat_SeqAIJ *x,*y; 1804 1805 PetscFunctionBegin; 1806 if (str == SAME_NONZERO_PATTERN) { 1807 PetscScalar alpha = a; 1808 x = (Mat_SeqAIJ *)xx->A->data; 1809 y = (Mat_SeqAIJ *)yy->A->data; 1810 bnz = PetscBLASIntCast(x->nz); 1811 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1812 x = (Mat_SeqAIJ *)xx->B->data; 1813 y = (Mat_SeqAIJ *)yy->B->data; 1814 bnz = PetscBLASIntCast(x->nz); 1815 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1816 } else if (str == SUBSET_NONZERO_PATTERN) { 1817 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1818 1819 x = (Mat_SeqAIJ *)xx->B->data; 1820 y = (Mat_SeqAIJ *)yy->B->data; 1821 if (y->xtoy && y->XtoY != xx->B) { 1822 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1823 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1824 } 1825 if (!y->xtoy) { /* get xtoy */ 1826 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1827 y->XtoY = xx->B; 1828 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1829 } 1830 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1831 } else { 1832 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1833 } 1834 PetscFunctionReturn(0); 1835 } 1836 1837 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatConjugate_MPIAIJ" 1841 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1842 { 1843 #if defined(PETSC_USE_COMPLEX) 1844 PetscErrorCode ierr; 1845 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1846 1847 PetscFunctionBegin; 1848 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1849 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1850 #else 1851 PetscFunctionBegin; 1852 #endif 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatRealPart_MPIAIJ" 1858 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1859 { 1860 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1861 PetscErrorCode ierr; 1862 1863 PetscFunctionBegin; 1864 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1865 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1866 PetscFunctionReturn(0); 1867 } 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1871 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1872 { 1873 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1874 PetscErrorCode ierr; 1875 1876 PetscFunctionBegin; 1877 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1878 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #ifdef PETSC_HAVE_PBGL 1883 1884 #include <boost/parallel/mpi/bsp_process_group.hpp> 1885 #include <boost/graph/distributed/ilu_default_graph.hpp> 1886 #include <boost/graph/distributed/ilu_0_block.hpp> 1887 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1888 #include <boost/graph/distributed/petsc/interface.hpp> 1889 #include <boost/multi_array.hpp> 1890 #include <boost/parallel/distributed_property_map->hpp> 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1894 /* 1895 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1896 */ 1897 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1898 { 1899 namespace petsc = boost::distributed::petsc; 1900 1901 namespace graph_dist = boost::graph::distributed; 1902 using boost::graph::distributed::ilu_default::process_group_type; 1903 using boost::graph::ilu_permuted; 1904 1905 PetscTruth row_identity, col_identity; 1906 PetscContainer c; 1907 PetscInt m, n, M, N; 1908 PetscErrorCode ierr; 1909 1910 PetscFunctionBegin; 1911 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1912 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1913 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1914 if (!row_identity || !col_identity) { 1915 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1916 } 1917 1918 process_group_type pg; 1919 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1920 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1921 lgraph_type& level_graph = *lgraph_p; 1922 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1923 1924 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1925 ilu_permuted(level_graph); 1926 1927 /* put together the new matrix */ 1928 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1929 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1930 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1931 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1932 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1933 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1934 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1935 1936 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1937 ierr = PetscContainerSetPointer(c, lgraph_p); 1938 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1944 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1945 { 1946 PetscFunctionBegin; 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatSolve_MPIAIJ" 1952 /* 1953 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1954 */ 1955 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1956 { 1957 namespace graph_dist = boost::graph::distributed; 1958 1959 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1960 lgraph_type* lgraph_p; 1961 PetscContainer c; 1962 PetscErrorCode ierr; 1963 1964 PetscFunctionBegin; 1965 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1966 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1967 ierr = VecCopy(b, x);CHKERRQ(ierr); 1968 1969 PetscScalar* array_x; 1970 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1971 PetscInt sx; 1972 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1973 1974 PetscScalar* array_b; 1975 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1976 PetscInt sb; 1977 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1978 1979 lgraph_type& level_graph = *lgraph_p; 1980 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1981 1982 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1983 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1984 ref_x(array_x, boost::extents[num_vertices(graph)]); 1985 1986 typedef boost::iterator_property_map<array_ref_type::iterator, 1987 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1988 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1989 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1990 1991 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1992 1993 PetscFunctionReturn(0); 1994 } 1995 #endif 1996 1997 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1998 PetscInt nzlocal,nsends,nrecvs; 1999 PetscMPIInt *send_rank; 2000 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 2001 PetscScalar *sbuf_a,**rbuf_a; 2002 PetscErrorCode (*MatDestroy)(Mat); 2003 } Mat_Redundant; 2004 2005 #undef __FUNCT__ 2006 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2007 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2008 { 2009 PetscErrorCode ierr; 2010 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2011 PetscInt i; 2012 2013 PetscFunctionBegin; 2014 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 2015 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2016 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2017 for (i=0; i<redund->nrecvs; i++){ 2018 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2019 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2020 } 2021 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2022 ierr = PetscFree(redund);CHKERRQ(ierr); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "MatDestroy_MatRedundant" 2028 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2029 { 2030 PetscErrorCode ierr; 2031 PetscContainer container; 2032 Mat_Redundant *redund=PETSC_NULL; 2033 2034 PetscFunctionBegin; 2035 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2036 if (container) { 2037 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2038 } else { 2039 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2040 } 2041 A->ops->destroy = redund->MatDestroy; 2042 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2043 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2044 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2045 PetscFunctionReturn(0); 2046 } 2047 2048 #undef __FUNCT__ 2049 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2050 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2051 { 2052 PetscMPIInt rank,size; 2053 MPI_Comm comm=((PetscObject)mat)->comm; 2054 PetscErrorCode ierr; 2055 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2056 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2057 PetscInt *rowrange=mat->rmap->range; 2058 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2059 Mat A=aij->A,B=aij->B,C=*matredundant; 2060 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2061 PetscScalar *sbuf_a; 2062 PetscInt nzlocal=a->nz+b->nz; 2063 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2064 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2065 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2066 MatScalar *aworkA,*aworkB; 2067 PetscScalar *vals; 2068 PetscMPIInt tag1,tag2,tag3,imdex; 2069 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2070 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2071 MPI_Status recv_status,*send_status; 2072 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2073 PetscInt **rbuf_j=PETSC_NULL; 2074 PetscScalar **rbuf_a=PETSC_NULL; 2075 Mat_Redundant *redund=PETSC_NULL; 2076 PetscContainer container; 2077 2078 PetscFunctionBegin; 2079 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2080 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2081 2082 if (reuse == MAT_REUSE_MATRIX) { 2083 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2084 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2085 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2086 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2087 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2088 if (container) { 2089 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2090 } else { 2091 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2092 } 2093 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2094 2095 nsends = redund->nsends; 2096 nrecvs = redund->nrecvs; 2097 send_rank = redund->send_rank; recv_rank = send_rank + size; 2098 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2099 sbuf_j = redund->sbuf_j; 2100 sbuf_a = redund->sbuf_a; 2101 rbuf_j = redund->rbuf_j; 2102 rbuf_a = redund->rbuf_a; 2103 } 2104 2105 if (reuse == MAT_INITIAL_MATRIX){ 2106 PetscMPIInt subrank,subsize; 2107 PetscInt nleftover,np_subcomm; 2108 /* get the destination processors' id send_rank, nsends and nrecvs */ 2109 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2110 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2111 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2112 recv_rank = send_rank + size; 2113 np_subcomm = size/nsubcomm; 2114 nleftover = size - nsubcomm*np_subcomm; 2115 nsends = 0; nrecvs = 0; 2116 for (i=0; i<size; i++){ /* i=rank*/ 2117 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2118 send_rank[nsends] = i; nsends++; 2119 recv_rank[nrecvs++] = i; 2120 } 2121 } 2122 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2123 i = size-nleftover-1; 2124 j = 0; 2125 while (j < nsubcomm - nleftover){ 2126 send_rank[nsends++] = i; 2127 i--; j++; 2128 } 2129 } 2130 2131 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2132 for (i=0; i<nleftover; i++){ 2133 recv_rank[nrecvs++] = size-nleftover+i; 2134 } 2135 } 2136 2137 /* allocate sbuf_j, sbuf_a */ 2138 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2139 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2140 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2141 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2142 2143 /* copy mat's local entries into the buffers */ 2144 if (reuse == MAT_INITIAL_MATRIX){ 2145 rownz_max = 0; 2146 rptr = sbuf_j; 2147 cols = sbuf_j + rend-rstart + 1; 2148 vals = sbuf_a; 2149 rptr[0] = 0; 2150 for (i=0; i<rend-rstart; i++){ 2151 row = i + rstart; 2152 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2153 ncols = nzA + nzB; 2154 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2155 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2156 /* load the column indices for this row into cols */ 2157 lwrite = 0; 2158 for (l=0; l<nzB; l++) { 2159 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2160 vals[lwrite] = aworkB[l]; 2161 cols[lwrite++] = ctmp; 2162 } 2163 } 2164 for (l=0; l<nzA; l++){ 2165 vals[lwrite] = aworkA[l]; 2166 cols[lwrite++] = cstart + cworkA[l]; 2167 } 2168 for (l=0; l<nzB; l++) { 2169 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2170 vals[lwrite] = aworkB[l]; 2171 cols[lwrite++] = ctmp; 2172 } 2173 } 2174 vals += ncols; 2175 cols += ncols; 2176 rptr[i+1] = rptr[i] + ncols; 2177 if (rownz_max < ncols) rownz_max = ncols; 2178 } 2179 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2180 } else { /* only copy matrix values into sbuf_a */ 2181 rptr = sbuf_j; 2182 vals = sbuf_a; 2183 rptr[0] = 0; 2184 for (i=0; i<rend-rstart; i++){ 2185 row = i + rstart; 2186 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2187 ncols = nzA + nzB; 2188 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2189 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2190 lwrite = 0; 2191 for (l=0; l<nzB; l++) { 2192 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2193 } 2194 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2195 for (l=0; l<nzB; l++) { 2196 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2197 } 2198 vals += ncols; 2199 rptr[i+1] = rptr[i] + ncols; 2200 } 2201 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2202 2203 /* send nzlocal to others, and recv other's nzlocal */ 2204 /*--------------------------------------------------*/ 2205 if (reuse == MAT_INITIAL_MATRIX){ 2206 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2207 s_waits2 = s_waits3 + nsends; 2208 s_waits1 = s_waits2 + nsends; 2209 r_waits1 = s_waits1 + nsends; 2210 r_waits2 = r_waits1 + nrecvs; 2211 r_waits3 = r_waits2 + nrecvs; 2212 } else { 2213 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2214 r_waits3 = s_waits3 + nsends; 2215 } 2216 2217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2218 if (reuse == MAT_INITIAL_MATRIX){ 2219 /* get new tags to keep the communication clean */ 2220 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2222 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2223 rbuf_nz = sbuf_nz + nsends; 2224 2225 /* post receives of other's nzlocal */ 2226 for (i=0; i<nrecvs; i++){ 2227 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2228 } 2229 /* send nzlocal to others */ 2230 for (i=0; i<nsends; i++){ 2231 sbuf_nz[i] = nzlocal; 2232 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2233 } 2234 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2235 count = nrecvs; 2236 while (count) { 2237 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2238 recv_rank[imdex] = recv_status.MPI_SOURCE; 2239 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2240 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2241 2242 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2243 rbuf_nz[imdex] += i + 2; 2244 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2245 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2246 count--; 2247 } 2248 /* wait on sends of nzlocal */ 2249 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2250 /* send mat->i,j to others, and recv from other's */ 2251 /*------------------------------------------------*/ 2252 for (i=0; i<nsends; i++){ 2253 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2254 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2255 } 2256 /* wait on receives of mat->i,j */ 2257 /*------------------------------*/ 2258 count = nrecvs; 2259 while (count) { 2260 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2261 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2262 count--; 2263 } 2264 /* wait on sends of mat->i,j */ 2265 /*---------------------------*/ 2266 if (nsends) { 2267 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2268 } 2269 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2270 2271 /* post receives, send and receive mat->a */ 2272 /*----------------------------------------*/ 2273 for (imdex=0; imdex<nrecvs; imdex++) { 2274 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2275 } 2276 for (i=0; i<nsends; i++){ 2277 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2278 } 2279 count = nrecvs; 2280 while (count) { 2281 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2282 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2283 count--; 2284 } 2285 if (nsends) { 2286 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2287 } 2288 2289 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2290 2291 /* create redundant matrix */ 2292 /*-------------------------*/ 2293 if (reuse == MAT_INITIAL_MATRIX){ 2294 /* compute rownz_max for preallocation */ 2295 for (imdex=0; imdex<nrecvs; imdex++){ 2296 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2297 rptr = rbuf_j[imdex]; 2298 for (i=0; i<j; i++){ 2299 ncols = rptr[i+1] - rptr[i]; 2300 if (rownz_max < ncols) rownz_max = ncols; 2301 } 2302 } 2303 2304 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2305 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2306 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2307 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2308 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2309 } else { 2310 C = *matredundant; 2311 } 2312 2313 /* insert local matrix entries */ 2314 rptr = sbuf_j; 2315 cols = sbuf_j + rend-rstart + 1; 2316 vals = sbuf_a; 2317 for (i=0; i<rend-rstart; i++){ 2318 row = i + rstart; 2319 ncols = rptr[i+1] - rptr[i]; 2320 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2321 vals += ncols; 2322 cols += ncols; 2323 } 2324 /* insert received matrix entries */ 2325 for (imdex=0; imdex<nrecvs; imdex++){ 2326 rstart = rowrange[recv_rank[imdex]]; 2327 rend = rowrange[recv_rank[imdex]+1]; 2328 rptr = rbuf_j[imdex]; 2329 cols = rbuf_j[imdex] + rend-rstart + 1; 2330 vals = rbuf_a[imdex]; 2331 for (i=0; i<rend-rstart; i++){ 2332 row = i + rstart; 2333 ncols = rptr[i+1] - rptr[i]; 2334 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2335 vals += ncols; 2336 cols += ncols; 2337 } 2338 } 2339 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2340 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2341 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2342 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2343 if (reuse == MAT_INITIAL_MATRIX){ 2344 PetscContainer container; 2345 *matredundant = C; 2346 /* create a supporting struct and attach it to C for reuse */ 2347 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2348 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2349 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2350 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2351 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2352 2353 redund->nzlocal = nzlocal; 2354 redund->nsends = nsends; 2355 redund->nrecvs = nrecvs; 2356 redund->send_rank = send_rank; 2357 redund->sbuf_nz = sbuf_nz; 2358 redund->sbuf_j = sbuf_j; 2359 redund->sbuf_a = sbuf_a; 2360 redund->rbuf_j = rbuf_j; 2361 redund->rbuf_a = rbuf_a; 2362 2363 redund->MatDestroy = C->ops->destroy; 2364 C->ops->destroy = MatDestroy_MatRedundant; 2365 } 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2371 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2372 { 2373 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2374 PetscErrorCode ierr; 2375 PetscInt i,*idxb = 0; 2376 PetscScalar *va,*vb; 2377 Vec vtmp; 2378 2379 PetscFunctionBegin; 2380 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2381 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2382 if (idx) { 2383 for (i=0; i<A->rmap->n; i++) { 2384 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2385 } 2386 } 2387 2388 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2389 if (idx) { 2390 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2391 } 2392 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2393 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2394 2395 for (i=0; i<A->rmap->n; i++){ 2396 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2397 va[i] = vb[i]; 2398 if (idx) idx[i] = a->garray[idxb[i]]; 2399 } 2400 } 2401 2402 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2403 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2404 if (idxb) { 2405 ierr = PetscFree(idxb);CHKERRQ(ierr); 2406 } 2407 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2413 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2414 { 2415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2416 PetscErrorCode ierr; 2417 PetscInt i,*idxb = 0; 2418 PetscScalar *va,*vb; 2419 Vec vtmp; 2420 2421 PetscFunctionBegin; 2422 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2423 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2424 if (idx) { 2425 for (i=0; i<A->cmap->n; i++) { 2426 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2427 } 2428 } 2429 2430 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2431 if (idx) { 2432 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2433 } 2434 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2435 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2436 2437 for (i=0; i<A->rmap->n; i++){ 2438 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2439 va[i] = vb[i]; 2440 if (idx) idx[i] = a->garray[idxb[i]]; 2441 } 2442 } 2443 2444 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2445 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2446 if (idxb) { 2447 ierr = PetscFree(idxb);CHKERRQ(ierr); 2448 } 2449 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2455 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2456 { 2457 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2458 PetscInt n = A->rmap->n; 2459 PetscInt cstart = A->cmap->rstart; 2460 PetscInt *cmap = mat->garray; 2461 PetscInt *diagIdx, *offdiagIdx; 2462 Vec diagV, offdiagV; 2463 PetscScalar *a, *diagA, *offdiagA; 2464 PetscInt r; 2465 PetscErrorCode ierr; 2466 2467 PetscFunctionBegin; 2468 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2469 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2470 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2471 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2472 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2473 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2474 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2475 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2476 for(r = 0; r < n; ++r) { 2477 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2478 a[r] = diagA[r]; 2479 idx[r] = cstart + diagIdx[r]; 2480 } else { 2481 a[r] = offdiagA[r]; 2482 idx[r] = cmap[offdiagIdx[r]]; 2483 } 2484 } 2485 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2486 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2487 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2488 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2489 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2490 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2491 PetscFunctionReturn(0); 2492 } 2493 2494 #undef __FUNCT__ 2495 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2496 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2497 { 2498 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2499 PetscInt n = A->rmap->n; 2500 PetscInt cstart = A->cmap->rstart; 2501 PetscInt *cmap = mat->garray; 2502 PetscInt *diagIdx, *offdiagIdx; 2503 Vec diagV, offdiagV; 2504 PetscScalar *a, *diagA, *offdiagA; 2505 PetscInt r; 2506 PetscErrorCode ierr; 2507 2508 PetscFunctionBegin; 2509 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2510 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2511 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2512 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2513 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2514 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2515 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2516 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2517 for(r = 0; r < n; ++r) { 2518 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2519 a[r] = diagA[r]; 2520 idx[r] = cstart + diagIdx[r]; 2521 } else { 2522 a[r] = offdiagA[r]; 2523 idx[r] = cmap[offdiagIdx[r]]; 2524 } 2525 } 2526 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2527 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2528 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2529 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2530 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2531 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2532 PetscFunctionReturn(0); 2533 } 2534 2535 #undef __FUNCT__ 2536 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2537 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2538 { 2539 PetscErrorCode ierr; 2540 2541 PetscFunctionBegin; 2542 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 /* -------------------------------------------------------------------*/ 2547 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2548 MatGetRow_MPIAIJ, 2549 MatRestoreRow_MPIAIJ, 2550 MatMult_MPIAIJ, 2551 /* 4*/ MatMultAdd_MPIAIJ, 2552 MatMultTranspose_MPIAIJ, 2553 MatMultTransposeAdd_MPIAIJ, 2554 #ifdef PETSC_HAVE_PBGL 2555 MatSolve_MPIAIJ, 2556 #else 2557 0, 2558 #endif 2559 0, 2560 0, 2561 /*10*/ 0, 2562 0, 2563 0, 2564 MatRelax_MPIAIJ, 2565 MatTranspose_MPIAIJ, 2566 /*15*/ MatGetInfo_MPIAIJ, 2567 MatEqual_MPIAIJ, 2568 MatGetDiagonal_MPIAIJ, 2569 MatDiagonalScale_MPIAIJ, 2570 MatNorm_MPIAIJ, 2571 /*20*/ MatAssemblyBegin_MPIAIJ, 2572 MatAssemblyEnd_MPIAIJ, 2573 MatSetOption_MPIAIJ, 2574 MatZeroEntries_MPIAIJ, 2575 /*24*/ MatZeroRows_MPIAIJ, 2576 0, 2577 #ifdef PETSC_HAVE_PBGL 2578 0, 2579 #else 2580 0, 2581 #endif 2582 0, 2583 0, 2584 /*29*/ MatSetUpPreallocation_MPIAIJ, 2585 #ifdef PETSC_HAVE_PBGL 2586 0, 2587 #else 2588 0, 2589 #endif 2590 0, 2591 0, 2592 0, 2593 /*34*/ MatDuplicate_MPIAIJ, 2594 0, 2595 0, 2596 0, 2597 0, 2598 /*39*/ MatAXPY_MPIAIJ, 2599 MatGetSubMatrices_MPIAIJ, 2600 MatIncreaseOverlap_MPIAIJ, 2601 MatGetValues_MPIAIJ, 2602 MatCopy_MPIAIJ, 2603 /*44*/ MatGetRowMax_MPIAIJ, 2604 MatScale_MPIAIJ, 2605 0, 2606 0, 2607 0, 2608 /*49*/ MatSetBlockSize_MPIAIJ, 2609 0, 2610 0, 2611 0, 2612 0, 2613 /*54*/ MatFDColoringCreate_MPIAIJ, 2614 0, 2615 MatSetUnfactored_MPIAIJ, 2616 MatPermute_MPIAIJ, 2617 0, 2618 /*59*/ MatGetSubMatrix_MPIAIJ, 2619 MatDestroy_MPIAIJ, 2620 MatView_MPIAIJ, 2621 0, 2622 0, 2623 /*64*/ 0, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2629 MatGetRowMinAbs_MPIAIJ, 2630 0, 2631 MatSetColoring_MPIAIJ, 2632 #if defined(PETSC_HAVE_ADIC) 2633 MatSetValuesAdic_MPIAIJ, 2634 #else 2635 0, 2636 #endif 2637 MatSetValuesAdifor_MPIAIJ, 2638 /*75*/ 0, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /*80*/ 0, 2644 0, 2645 0, 2646 /*83*/ MatLoad_MPIAIJ, 2647 0, 2648 0, 2649 0, 2650 0, 2651 0, 2652 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2653 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2654 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2655 MatPtAP_Basic, 2656 MatPtAPSymbolic_MPIAIJ, 2657 /*94*/ MatPtAPNumeric_MPIAIJ, 2658 0, 2659 0, 2660 0, 2661 0, 2662 /*99*/ 0, 2663 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2664 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2665 MatConjugate_MPIAIJ, 2666 0, 2667 /*104*/MatSetValuesRow_MPIAIJ, 2668 MatRealPart_MPIAIJ, 2669 MatImaginaryPart_MPIAIJ, 2670 0, 2671 0, 2672 /*109*/0, 2673 MatGetRedundantMatrix_MPIAIJ, 2674 MatGetRowMin_MPIAIJ, 2675 0, 2676 0, 2677 /*114*/MatGetSeqNonzerostructure_MPIAIJ}; 2678 2679 /* ----------------------------------------------------------------------------------------*/ 2680 2681 EXTERN_C_BEGIN 2682 #undef __FUNCT__ 2683 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2684 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2685 { 2686 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2687 PetscErrorCode ierr; 2688 2689 PetscFunctionBegin; 2690 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2691 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2692 PetscFunctionReturn(0); 2693 } 2694 EXTERN_C_END 2695 2696 EXTERN_C_BEGIN 2697 #undef __FUNCT__ 2698 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2699 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2700 { 2701 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2702 PetscErrorCode ierr; 2703 2704 PetscFunctionBegin; 2705 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2706 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2707 PetscFunctionReturn(0); 2708 } 2709 EXTERN_C_END 2710 2711 #include "petscpc.h" 2712 EXTERN_C_BEGIN 2713 #undef __FUNCT__ 2714 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2715 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2716 { 2717 Mat_MPIAIJ *b; 2718 PetscErrorCode ierr; 2719 PetscInt i; 2720 2721 PetscFunctionBegin; 2722 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2723 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2724 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2725 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2726 2727 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2728 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2729 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2730 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2731 if (d_nnz) { 2732 for (i=0; i<B->rmap->n; i++) { 2733 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2734 } 2735 } 2736 if (o_nnz) { 2737 for (i=0; i<B->rmap->n; i++) { 2738 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2739 } 2740 } 2741 b = (Mat_MPIAIJ*)B->data; 2742 2743 if (!B->preallocated) { 2744 /* Explicitly create 2 MATSEQAIJ matrices. */ 2745 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2746 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2747 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2748 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2749 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2750 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2751 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2752 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2753 } 2754 2755 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2756 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2757 B->preallocated = PETSC_TRUE; 2758 PetscFunctionReturn(0); 2759 } 2760 EXTERN_C_END 2761 2762 #undef __FUNCT__ 2763 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2764 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2765 { 2766 Mat mat; 2767 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2768 PetscErrorCode ierr; 2769 2770 PetscFunctionBegin; 2771 *newmat = 0; 2772 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2773 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2774 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2775 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2776 a = (Mat_MPIAIJ*)mat->data; 2777 2778 mat->factor = matin->factor; 2779 mat->rmap->bs = matin->rmap->bs; 2780 mat->assembled = PETSC_TRUE; 2781 mat->insertmode = NOT_SET_VALUES; 2782 mat->preallocated = PETSC_TRUE; 2783 2784 a->size = oldmat->size; 2785 a->rank = oldmat->rank; 2786 a->donotstash = oldmat->donotstash; 2787 a->roworiented = oldmat->roworiented; 2788 a->rowindices = 0; 2789 a->rowvalues = 0; 2790 a->getrowactive = PETSC_FALSE; 2791 2792 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2793 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2794 2795 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2796 if (oldmat->colmap) { 2797 #if defined (PETSC_USE_CTABLE) 2798 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2799 #else 2800 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2801 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2802 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2803 #endif 2804 } else a->colmap = 0; 2805 if (oldmat->garray) { 2806 PetscInt len; 2807 len = oldmat->B->cmap->n; 2808 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2809 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2810 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2811 } else a->garray = 0; 2812 2813 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2814 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2815 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2816 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2817 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2818 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2819 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2820 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2821 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2822 *newmat = mat; 2823 PetscFunctionReturn(0); 2824 } 2825 2826 #include "petscsys.h" 2827 2828 #undef __FUNCT__ 2829 #define __FUNCT__ "MatLoad_MPIAIJ" 2830 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2831 { 2832 Mat A; 2833 PetscScalar *vals,*svals; 2834 MPI_Comm comm = ((PetscObject)viewer)->comm; 2835 MPI_Status status; 2836 PetscErrorCode ierr; 2837 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2838 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2839 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2840 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2841 PetscInt cend,cstart,n,*rowners; 2842 int fd; 2843 2844 PetscFunctionBegin; 2845 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2846 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2847 if (!rank) { 2848 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2849 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2850 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2851 } 2852 2853 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2854 M = header[1]; N = header[2]; 2855 /* determine ownership of all rows */ 2856 m = M/size + ((M % size) > rank); 2857 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2858 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2859 2860 /* First process needs enough room for process with most rows */ 2861 if (!rank) { 2862 mmax = rowners[1]; 2863 for (i=2; i<size; i++) { 2864 mmax = PetscMax(mmax,rowners[i]); 2865 } 2866 } else mmax = m; 2867 2868 rowners[0] = 0; 2869 for (i=2; i<=size; i++) { 2870 rowners[i] += rowners[i-1]; 2871 } 2872 rstart = rowners[rank]; 2873 rend = rowners[rank+1]; 2874 2875 /* distribute row lengths to all processors */ 2876 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2877 if (!rank) { 2878 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2879 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2880 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2881 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2882 for (j=0; j<m; j++) { 2883 procsnz[0] += ourlens[j]; 2884 } 2885 for (i=1; i<size; i++) { 2886 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2887 /* calculate the number of nonzeros on each processor */ 2888 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2889 procsnz[i] += rowlengths[j]; 2890 } 2891 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2892 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2893 } 2894 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2895 } else { 2896 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2897 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2898 } 2899 2900 if (!rank) { 2901 /* determine max buffer needed and allocate it */ 2902 maxnz = 0; 2903 for (i=0; i<size; i++) { 2904 maxnz = PetscMax(maxnz,procsnz[i]); 2905 } 2906 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2907 2908 /* read in my part of the matrix column indices */ 2909 nz = procsnz[0]; 2910 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2911 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2912 2913 /* read in every one elses and ship off */ 2914 for (i=1; i<size; i++) { 2915 nz = procsnz[i]; 2916 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2917 mpicnt = PetscMPIIntCast(nz); 2918 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2919 } 2920 ierr = PetscFree(cols);CHKERRQ(ierr); 2921 } else { 2922 /* determine buffer space needed for message */ 2923 nz = 0; 2924 for (i=0; i<m; i++) { 2925 nz += ourlens[i]; 2926 } 2927 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2928 2929 /* receive message of column indices*/ 2930 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2931 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2932 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2933 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2934 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2935 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2936 } 2937 2938 /* determine column ownership if matrix is not square */ 2939 if (N != M) { 2940 n = N/size + ((N % size) > rank); 2941 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2942 cstart = cend - n; 2943 } else { 2944 cstart = rstart; 2945 cend = rend; 2946 n = cend - cstart; 2947 } 2948 2949 /* loop over local rows, determining number of off diagonal entries */ 2950 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2951 jj = 0; 2952 for (i=0; i<m; i++) { 2953 for (j=0; j<ourlens[i]; j++) { 2954 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2955 jj++; 2956 } 2957 } 2958 2959 /* create our matrix */ 2960 for (i=0; i<m; i++) { 2961 ourlens[i] -= offlens[i]; 2962 } 2963 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2964 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2965 ierr = MatSetType(A,type);CHKERRQ(ierr); 2966 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2967 2968 for (i=0; i<m; i++) { 2969 ourlens[i] += offlens[i]; 2970 } 2971 2972 if (!rank) { 2973 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2974 2975 /* read in my part of the matrix numerical values */ 2976 nz = procsnz[0]; 2977 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2978 2979 /* insert into matrix */ 2980 jj = rstart; 2981 smycols = mycols; 2982 svals = vals; 2983 for (i=0; i<m; i++) { 2984 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2985 smycols += ourlens[i]; 2986 svals += ourlens[i]; 2987 jj++; 2988 } 2989 2990 /* read in other processors and ship out */ 2991 for (i=1; i<size; i++) { 2992 nz = procsnz[i]; 2993 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2994 mpicnt = PetscMPIIntCast(nz); 2995 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2996 } 2997 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2998 } else { 2999 /* receive numeric values */ 3000 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3001 3002 /* receive message of values*/ 3003 mpicnt = PetscMPIIntCast(nz); 3004 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3005 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3006 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3007 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3008 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3009 3010 /* insert into matrix */ 3011 jj = rstart; 3012 smycols = mycols; 3013 svals = vals; 3014 for (i=0; i<m; i++) { 3015 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3016 smycols += ourlens[i]; 3017 svals += ourlens[i]; 3018 jj++; 3019 } 3020 } 3021 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3022 ierr = PetscFree(vals);CHKERRQ(ierr); 3023 ierr = PetscFree(mycols);CHKERRQ(ierr); 3024 ierr = PetscFree(rowners);CHKERRQ(ierr); 3025 3026 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3027 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3028 *newmat = A; 3029 PetscFunctionReturn(0); 3030 } 3031 3032 #undef __FUNCT__ 3033 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3034 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3035 { 3036 PetscErrorCode ierr; 3037 IS iscol_local; 3038 PetscInt csize; 3039 3040 PetscFunctionBegin; 3041 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3042 if (call == MAT_REUSE_MATRIX) { 3043 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3044 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3045 } else { 3046 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3047 } 3048 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3049 if (call == MAT_INITIAL_MATRIX) { 3050 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3051 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3052 } 3053 PetscFunctionReturn(0); 3054 } 3055 3056 #undef __FUNCT__ 3057 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3058 /* 3059 Not great since it makes two copies of the submatrix, first an SeqAIJ 3060 in local and then by concatenating the local matrices the end result. 3061 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3062 3063 Note: This requires a sequential iscol with all indices. 3064 */ 3065 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3066 { 3067 PetscErrorCode ierr; 3068 PetscMPIInt rank,size; 3069 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3070 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3071 Mat *local,M,Mreuse; 3072 MatScalar *vwork,*aa; 3073 MPI_Comm comm = ((PetscObject)mat)->comm; 3074 Mat_SeqAIJ *aij; 3075 3076 3077 PetscFunctionBegin; 3078 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3079 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3080 3081 if (call == MAT_REUSE_MATRIX) { 3082 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3083 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3084 local = &Mreuse; 3085 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3086 } else { 3087 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3088 Mreuse = *local; 3089 ierr = PetscFree(local);CHKERRQ(ierr); 3090 } 3091 3092 /* 3093 m - number of local rows 3094 n - number of columns (same on all processors) 3095 rstart - first row in new global matrix generated 3096 */ 3097 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3098 if (call == MAT_INITIAL_MATRIX) { 3099 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3100 ii = aij->i; 3101 jj = aij->j; 3102 3103 /* 3104 Determine the number of non-zeros in the diagonal and off-diagonal 3105 portions of the matrix in order to do correct preallocation 3106 */ 3107 3108 /* first get start and end of "diagonal" columns */ 3109 if (csize == PETSC_DECIDE) { 3110 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3111 if (mglobal == n) { /* square matrix */ 3112 nlocal = m; 3113 } else { 3114 nlocal = n/size + ((n % size) > rank); 3115 } 3116 } else { 3117 nlocal = csize; 3118 } 3119 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3120 rstart = rend - nlocal; 3121 if (rank == size - 1 && rend != n) { 3122 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3123 } 3124 3125 /* next, compute all the lengths */ 3126 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3127 olens = dlens + m; 3128 for (i=0; i<m; i++) { 3129 jend = ii[i+1] - ii[i]; 3130 olen = 0; 3131 dlen = 0; 3132 for (j=0; j<jend; j++) { 3133 if (*jj < rstart || *jj >= rend) olen++; 3134 else dlen++; 3135 jj++; 3136 } 3137 olens[i] = olen; 3138 dlens[i] = dlen; 3139 } 3140 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3141 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3142 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3143 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3144 ierr = PetscFree(dlens);CHKERRQ(ierr); 3145 } else { 3146 PetscInt ml,nl; 3147 3148 M = *newmat; 3149 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3150 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3151 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3152 /* 3153 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3154 rather than the slower MatSetValues(). 3155 */ 3156 M->was_assembled = PETSC_TRUE; 3157 M->assembled = PETSC_FALSE; 3158 } 3159 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3160 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3161 ii = aij->i; 3162 jj = aij->j; 3163 aa = aij->a; 3164 for (i=0; i<m; i++) { 3165 row = rstart + i; 3166 nz = ii[i+1] - ii[i]; 3167 cwork = jj; jj += nz; 3168 vwork = aa; aa += nz; 3169 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3170 } 3171 3172 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3173 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3174 *newmat = M; 3175 3176 /* save submatrix used in processor for next request */ 3177 if (call == MAT_INITIAL_MATRIX) { 3178 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3179 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3180 } 3181 3182 PetscFunctionReturn(0); 3183 } 3184 3185 EXTERN_C_BEGIN 3186 #undef __FUNCT__ 3187 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3188 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3189 { 3190 PetscInt m,cstart, cend,j,nnz,i,d; 3191 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3192 const PetscInt *JJ; 3193 PetscScalar *values; 3194 PetscErrorCode ierr; 3195 3196 PetscFunctionBegin; 3197 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3198 3199 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3200 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3201 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3202 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3203 m = B->rmap->n; 3204 cstart = B->cmap->rstart; 3205 cend = B->cmap->rend; 3206 rstart = B->rmap->rstart; 3207 3208 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3209 o_nnz = d_nnz + m; 3210 3211 #if defined(PETSC_USE_DEBUGGING) 3212 for (i=0; i<m; i++) { 3213 nnz = Ii[i+1]- Ii[i]; 3214 JJ = J + Ii[i]; 3215 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3216 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3217 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3218 for (j=1; j<nnz; j++) { 3219 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3220 } 3221 } 3222 #endif 3223 3224 for (i=0; i<m; i++) { 3225 nnz = Ii[i+1]- Ii[i]; 3226 JJ = J + Ii[i]; 3227 nnz_max = PetscMax(nnz_max,nnz); 3228 for (j=0; j<nnz; j++) { 3229 if (*JJ >= cstart) break; 3230 JJ++; 3231 } 3232 d = 0; 3233 for (; j<nnz; j++) { 3234 if (*JJ++ >= cend) break; 3235 d++; 3236 } 3237 d_nnz[i] = d; 3238 o_nnz[i] = nnz - d; 3239 } 3240 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3241 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3242 3243 if (v) values = (PetscScalar*)v; 3244 else { 3245 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3246 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3247 } 3248 3249 for (i=0; i<m; i++) { 3250 ii = i + rstart; 3251 nnz = Ii[i+1]- Ii[i]; 3252 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3253 } 3254 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3255 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3256 3257 if (!v) { 3258 ierr = PetscFree(values);CHKERRQ(ierr); 3259 } 3260 PetscFunctionReturn(0); 3261 } 3262 EXTERN_C_END 3263 3264 #undef __FUNCT__ 3265 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3266 /*@ 3267 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3268 (the default parallel PETSc format). 3269 3270 Collective on MPI_Comm 3271 3272 Input Parameters: 3273 + B - the matrix 3274 . i - the indices into j for the start of each local row (starts with zero) 3275 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3276 - v - optional values in the matrix 3277 3278 Level: developer 3279 3280 Notes: 3281 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3282 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3283 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3284 3285 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3286 3287 The format which is used for the sparse matrix input, is equivalent to a 3288 row-major ordering.. i.e for the following matrix, the input data expected is 3289 as shown: 3290 3291 1 0 0 3292 2 0 3 P0 3293 ------- 3294 4 5 6 P1 3295 3296 Process0 [P0]: rows_owned=[0,1] 3297 i = {0,1,3} [size = nrow+1 = 2+1] 3298 j = {0,0,2} [size = nz = 6] 3299 v = {1,2,3} [size = nz = 6] 3300 3301 Process1 [P1]: rows_owned=[2] 3302 i = {0,3} [size = nrow+1 = 1+1] 3303 j = {0,1,2} [size = nz = 6] 3304 v = {4,5,6} [size = nz = 6] 3305 3306 The column indices for each row MUST be sorted. 3307 3308 .keywords: matrix, aij, compressed row, sparse, parallel 3309 3310 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3311 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3312 @*/ 3313 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3314 { 3315 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3316 3317 PetscFunctionBegin; 3318 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3319 if (f) { 3320 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3321 } 3322 PetscFunctionReturn(0); 3323 } 3324 3325 #undef __FUNCT__ 3326 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3327 /*@C 3328 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3329 (the default parallel PETSc format). For good matrix assembly performance 3330 the user should preallocate the matrix storage by setting the parameters 3331 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3332 performance can be increased by more than a factor of 50. 3333 3334 Collective on MPI_Comm 3335 3336 Input Parameters: 3337 + A - the matrix 3338 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3339 (same value is used for all local rows) 3340 . d_nnz - array containing the number of nonzeros in the various rows of the 3341 DIAGONAL portion of the local submatrix (possibly different for each row) 3342 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3343 The size of this array is equal to the number of local rows, i.e 'm'. 3344 You must leave room for the diagonal entry even if it is zero. 3345 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3346 submatrix (same value is used for all local rows). 3347 - o_nnz - array containing the number of nonzeros in the various rows of the 3348 OFF-DIAGONAL portion of the local submatrix (possibly different for 3349 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3350 structure. The size of this array is equal to the number 3351 of local rows, i.e 'm'. 3352 3353 If the *_nnz parameter is given then the *_nz parameter is ignored 3354 3355 The AIJ format (also called the Yale sparse matrix format or 3356 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3357 storage. The stored row and column indices begin with zero. See the users manual for details. 3358 3359 The parallel matrix is partitioned such that the first m0 rows belong to 3360 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3361 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3362 3363 The DIAGONAL portion of the local submatrix of a processor can be defined 3364 as the submatrix which is obtained by extraction the part corresponding 3365 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3366 first row that belongs to the processor, and r2 is the last row belonging 3367 to the this processor. This is a square mxm matrix. The remaining portion 3368 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3369 3370 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3371 3372 You can call MatGetInfo() to get information on how effective the preallocation was; 3373 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3374 You can also run with the option -info and look for messages with the string 3375 malloc in them to see if additional memory allocation was needed. 3376 3377 Example usage: 3378 3379 Consider the following 8x8 matrix with 34 non-zero values, that is 3380 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3381 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3382 as follows: 3383 3384 .vb 3385 1 2 0 | 0 3 0 | 0 4 3386 Proc0 0 5 6 | 7 0 0 | 8 0 3387 9 0 10 | 11 0 0 | 12 0 3388 ------------------------------------- 3389 13 0 14 | 15 16 17 | 0 0 3390 Proc1 0 18 0 | 19 20 21 | 0 0 3391 0 0 0 | 22 23 0 | 24 0 3392 ------------------------------------- 3393 Proc2 25 26 27 | 0 0 28 | 29 0 3394 30 0 0 | 31 32 33 | 0 34 3395 .ve 3396 3397 This can be represented as a collection of submatrices as: 3398 3399 .vb 3400 A B C 3401 D E F 3402 G H I 3403 .ve 3404 3405 Where the submatrices A,B,C are owned by proc0, D,E,F are 3406 owned by proc1, G,H,I are owned by proc2. 3407 3408 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3409 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3410 The 'M','N' parameters are 8,8, and have the same values on all procs. 3411 3412 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3413 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3414 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3415 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3416 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3417 matrix, ans [DF] as another SeqAIJ matrix. 3418 3419 When d_nz, o_nz parameters are specified, d_nz storage elements are 3420 allocated for every row of the local diagonal submatrix, and o_nz 3421 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3422 One way to choose d_nz and o_nz is to use the max nonzerors per local 3423 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3424 In this case, the values of d_nz,o_nz are: 3425 .vb 3426 proc0 : dnz = 2, o_nz = 2 3427 proc1 : dnz = 3, o_nz = 2 3428 proc2 : dnz = 1, o_nz = 4 3429 .ve 3430 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3431 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3432 for proc3. i.e we are using 12+15+10=37 storage locations to store 3433 34 values. 3434 3435 When d_nnz, o_nnz parameters are specified, the storage is specified 3436 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3437 In the above case the values for d_nnz,o_nnz are: 3438 .vb 3439 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3440 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3441 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3442 .ve 3443 Here the space allocated is sum of all the above values i.e 34, and 3444 hence pre-allocation is perfect. 3445 3446 Level: intermediate 3447 3448 .keywords: matrix, aij, compressed row, sparse, parallel 3449 3450 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3451 MPIAIJ, MatGetInfo() 3452 @*/ 3453 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3454 { 3455 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3456 3457 PetscFunctionBegin; 3458 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3459 if (f) { 3460 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3461 } 3462 PetscFunctionReturn(0); 3463 } 3464 3465 #undef __FUNCT__ 3466 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3467 /*@ 3468 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3469 CSR format the local rows. 3470 3471 Collective on MPI_Comm 3472 3473 Input Parameters: 3474 + comm - MPI communicator 3475 . m - number of local rows (Cannot be PETSC_DECIDE) 3476 . n - This value should be the same as the local size used in creating the 3477 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3478 calculated if N is given) For square matrices n is almost always m. 3479 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3480 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3481 . i - row indices 3482 . j - column indices 3483 - a - matrix values 3484 3485 Output Parameter: 3486 . mat - the matrix 3487 3488 Level: intermediate 3489 3490 Notes: 3491 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3492 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3493 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3494 3495 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3496 3497 The format which is used for the sparse matrix input, is equivalent to a 3498 row-major ordering.. i.e for the following matrix, the input data expected is 3499 as shown: 3500 3501 1 0 0 3502 2 0 3 P0 3503 ------- 3504 4 5 6 P1 3505 3506 Process0 [P0]: rows_owned=[0,1] 3507 i = {0,1,3} [size = nrow+1 = 2+1] 3508 j = {0,0,2} [size = nz = 6] 3509 v = {1,2,3} [size = nz = 6] 3510 3511 Process1 [P1]: rows_owned=[2] 3512 i = {0,3} [size = nrow+1 = 1+1] 3513 j = {0,1,2} [size = nz = 6] 3514 v = {4,5,6} [size = nz = 6] 3515 3516 .keywords: matrix, aij, compressed row, sparse, parallel 3517 3518 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3519 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3520 @*/ 3521 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3522 { 3523 PetscErrorCode ierr; 3524 3525 PetscFunctionBegin; 3526 if (i[0]) { 3527 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3528 } 3529 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3530 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3531 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3532 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3533 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3534 PetscFunctionReturn(0); 3535 } 3536 3537 #undef __FUNCT__ 3538 #define __FUNCT__ "MatCreateMPIAIJ" 3539 /*@C 3540 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3541 (the default parallel PETSc format). For good matrix assembly performance 3542 the user should preallocate the matrix storage by setting the parameters 3543 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3544 performance can be increased by more than a factor of 50. 3545 3546 Collective on MPI_Comm 3547 3548 Input Parameters: 3549 + comm - MPI communicator 3550 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3551 This value should be the same as the local size used in creating the 3552 y vector for the matrix-vector product y = Ax. 3553 . n - This value should be the same as the local size used in creating the 3554 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3555 calculated if N is given) For square matrices n is almost always m. 3556 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3557 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3558 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3559 (same value is used for all local rows) 3560 . d_nnz - array containing the number of nonzeros in the various rows of the 3561 DIAGONAL portion of the local submatrix (possibly different for each row) 3562 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3563 The size of this array is equal to the number of local rows, i.e 'm'. 3564 You must leave room for the diagonal entry even if it is zero. 3565 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3566 submatrix (same value is used for all local rows). 3567 - o_nnz - array containing the number of nonzeros in the various rows of the 3568 OFF-DIAGONAL portion of the local submatrix (possibly different for 3569 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3570 structure. The size of this array is equal to the number 3571 of local rows, i.e 'm'. 3572 3573 Output Parameter: 3574 . A - the matrix 3575 3576 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3577 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3578 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3579 3580 Notes: 3581 If the *_nnz parameter is given then the *_nz parameter is ignored 3582 3583 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3584 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3585 storage requirements for this matrix. 3586 3587 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3588 processor than it must be used on all processors that share the object for 3589 that argument. 3590 3591 The user MUST specify either the local or global matrix dimensions 3592 (possibly both). 3593 3594 The parallel matrix is partitioned across processors such that the 3595 first m0 rows belong to process 0, the next m1 rows belong to 3596 process 1, the next m2 rows belong to process 2 etc.. where 3597 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3598 values corresponding to [m x N] submatrix. 3599 3600 The columns are logically partitioned with the n0 columns belonging 3601 to 0th partition, the next n1 columns belonging to the next 3602 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3603 3604 The DIAGONAL portion of the local submatrix on any given processor 3605 is the submatrix corresponding to the rows and columns m,n 3606 corresponding to the given processor. i.e diagonal matrix on 3607 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3608 etc. The remaining portion of the local submatrix [m x (N-n)] 3609 constitute the OFF-DIAGONAL portion. The example below better 3610 illustrates this concept. 3611 3612 For a square global matrix we define each processor's diagonal portion 3613 to be its local rows and the corresponding columns (a square submatrix); 3614 each processor's off-diagonal portion encompasses the remainder of the 3615 local matrix (a rectangular submatrix). 3616 3617 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3618 3619 When calling this routine with a single process communicator, a matrix of 3620 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3621 type of communicator, use the construction mechanism: 3622 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3623 3624 By default, this format uses inodes (identical nodes) when possible. 3625 We search for consecutive rows with the same nonzero structure, thereby 3626 reusing matrix information to achieve increased efficiency. 3627 3628 Options Database Keys: 3629 + -mat_no_inode - Do not use inodes 3630 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3631 - -mat_aij_oneindex - Internally use indexing starting at 1 3632 rather than 0. Note that when calling MatSetValues(), 3633 the user still MUST index entries starting at 0! 3634 3635 3636 Example usage: 3637 3638 Consider the following 8x8 matrix with 34 non-zero values, that is 3639 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3640 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3641 as follows: 3642 3643 .vb 3644 1 2 0 | 0 3 0 | 0 4 3645 Proc0 0 5 6 | 7 0 0 | 8 0 3646 9 0 10 | 11 0 0 | 12 0 3647 ------------------------------------- 3648 13 0 14 | 15 16 17 | 0 0 3649 Proc1 0 18 0 | 19 20 21 | 0 0 3650 0 0 0 | 22 23 0 | 24 0 3651 ------------------------------------- 3652 Proc2 25 26 27 | 0 0 28 | 29 0 3653 30 0 0 | 31 32 33 | 0 34 3654 .ve 3655 3656 This can be represented as a collection of submatrices as: 3657 3658 .vb 3659 A B C 3660 D E F 3661 G H I 3662 .ve 3663 3664 Where the submatrices A,B,C are owned by proc0, D,E,F are 3665 owned by proc1, G,H,I are owned by proc2. 3666 3667 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3668 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3669 The 'M','N' parameters are 8,8, and have the same values on all procs. 3670 3671 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3672 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3673 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3674 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3675 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3676 matrix, ans [DF] as another SeqAIJ matrix. 3677 3678 When d_nz, o_nz parameters are specified, d_nz storage elements are 3679 allocated for every row of the local diagonal submatrix, and o_nz 3680 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3681 One way to choose d_nz and o_nz is to use the max nonzerors per local 3682 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3683 In this case, the values of d_nz,o_nz are: 3684 .vb 3685 proc0 : dnz = 2, o_nz = 2 3686 proc1 : dnz = 3, o_nz = 2 3687 proc2 : dnz = 1, o_nz = 4 3688 .ve 3689 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3690 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3691 for proc3. i.e we are using 12+15+10=37 storage locations to store 3692 34 values. 3693 3694 When d_nnz, o_nnz parameters are specified, the storage is specified 3695 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3696 In the above case the values for d_nnz,o_nnz are: 3697 .vb 3698 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3699 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3700 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3701 .ve 3702 Here the space allocated is sum of all the above values i.e 34, and 3703 hence pre-allocation is perfect. 3704 3705 Level: intermediate 3706 3707 .keywords: matrix, aij, compressed row, sparse, parallel 3708 3709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3710 MPIAIJ, MatCreateMPIAIJWithArrays() 3711 @*/ 3712 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3713 { 3714 PetscErrorCode ierr; 3715 PetscMPIInt size; 3716 3717 PetscFunctionBegin; 3718 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3719 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3720 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3721 if (size > 1) { 3722 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3723 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3724 } else { 3725 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3726 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3727 } 3728 PetscFunctionReturn(0); 3729 } 3730 3731 #undef __FUNCT__ 3732 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3733 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3734 { 3735 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3736 3737 PetscFunctionBegin; 3738 *Ad = a->A; 3739 *Ao = a->B; 3740 *colmap = a->garray; 3741 PetscFunctionReturn(0); 3742 } 3743 3744 #undef __FUNCT__ 3745 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3746 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3747 { 3748 PetscErrorCode ierr; 3749 PetscInt i; 3750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3751 3752 PetscFunctionBegin; 3753 if (coloring->ctype == IS_COLORING_GLOBAL) { 3754 ISColoringValue *allcolors,*colors; 3755 ISColoring ocoloring; 3756 3757 /* set coloring for diagonal portion */ 3758 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3759 3760 /* set coloring for off-diagonal portion */ 3761 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3762 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3763 for (i=0; i<a->B->cmap->n; i++) { 3764 colors[i] = allcolors[a->garray[i]]; 3765 } 3766 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3767 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3768 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3769 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3770 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3771 ISColoringValue *colors; 3772 PetscInt *larray; 3773 ISColoring ocoloring; 3774 3775 /* set coloring for diagonal portion */ 3776 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3777 for (i=0; i<a->A->cmap->n; i++) { 3778 larray[i] = i + A->cmap->rstart; 3779 } 3780 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3781 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3782 for (i=0; i<a->A->cmap->n; i++) { 3783 colors[i] = coloring->colors[larray[i]]; 3784 } 3785 ierr = PetscFree(larray);CHKERRQ(ierr); 3786 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3787 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3788 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3789 3790 /* set coloring for off-diagonal portion */ 3791 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3792 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3793 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3794 for (i=0; i<a->B->cmap->n; i++) { 3795 colors[i] = coloring->colors[larray[i]]; 3796 } 3797 ierr = PetscFree(larray);CHKERRQ(ierr); 3798 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3799 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3800 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3801 } else { 3802 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3803 } 3804 3805 PetscFunctionReturn(0); 3806 } 3807 3808 #if defined(PETSC_HAVE_ADIC) 3809 #undef __FUNCT__ 3810 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3811 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3812 { 3813 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3814 PetscErrorCode ierr; 3815 3816 PetscFunctionBegin; 3817 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3818 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3819 PetscFunctionReturn(0); 3820 } 3821 #endif 3822 3823 #undef __FUNCT__ 3824 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3825 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3826 { 3827 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3828 PetscErrorCode ierr; 3829 3830 PetscFunctionBegin; 3831 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3832 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3833 PetscFunctionReturn(0); 3834 } 3835 3836 #undef __FUNCT__ 3837 #define __FUNCT__ "MatMerge" 3838 /*@ 3839 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3840 matrices from each processor 3841 3842 Collective on MPI_Comm 3843 3844 Input Parameters: 3845 + comm - the communicators the parallel matrix will live on 3846 . inmat - the input sequential matrices 3847 . n - number of local columns (or PETSC_DECIDE) 3848 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3849 3850 Output Parameter: 3851 . outmat - the parallel matrix generated 3852 3853 Level: advanced 3854 3855 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3856 3857 @*/ 3858 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3859 { 3860 PetscErrorCode ierr; 3861 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3862 PetscInt *indx; 3863 PetscScalar *values; 3864 3865 PetscFunctionBegin; 3866 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3867 if (scall == MAT_INITIAL_MATRIX){ 3868 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3869 if (n == PETSC_DECIDE){ 3870 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3871 } 3872 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3873 rstart -= m; 3874 3875 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3876 for (i=0;i<m;i++) { 3877 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3878 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3879 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3880 } 3881 /* This routine will ONLY return MPIAIJ type matrix */ 3882 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3883 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3884 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3885 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3886 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3887 3888 } else if (scall == MAT_REUSE_MATRIX){ 3889 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3890 } else { 3891 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3892 } 3893 3894 for (i=0;i<m;i++) { 3895 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3896 Ii = i + rstart; 3897 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3898 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3899 } 3900 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3901 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3902 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3903 3904 PetscFunctionReturn(0); 3905 } 3906 3907 #undef __FUNCT__ 3908 #define __FUNCT__ "MatFileSplit" 3909 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3910 { 3911 PetscErrorCode ierr; 3912 PetscMPIInt rank; 3913 PetscInt m,N,i,rstart,nnz; 3914 size_t len; 3915 const PetscInt *indx; 3916 PetscViewer out; 3917 char *name; 3918 Mat B; 3919 const PetscScalar *values; 3920 3921 PetscFunctionBegin; 3922 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3923 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3924 /* Should this be the type of the diagonal block of A? */ 3925 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3926 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3927 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3928 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3929 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3930 for (i=0;i<m;i++) { 3931 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3932 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3933 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3934 } 3935 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3936 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3937 3938 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3939 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3940 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3941 sprintf(name,"%s.%d",outfile,rank); 3942 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3943 ierr = PetscFree(name); 3944 ierr = MatView(B,out);CHKERRQ(ierr); 3945 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3946 ierr = MatDestroy(B);CHKERRQ(ierr); 3947 PetscFunctionReturn(0); 3948 } 3949 3950 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3951 #undef __FUNCT__ 3952 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3953 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3954 { 3955 PetscErrorCode ierr; 3956 Mat_Merge_SeqsToMPI *merge; 3957 PetscContainer container; 3958 3959 PetscFunctionBegin; 3960 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3961 if (container) { 3962 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3963 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3964 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3965 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3966 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3967 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3968 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3969 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3970 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3971 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3972 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3973 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3974 3975 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3976 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3977 } 3978 ierr = PetscFree(merge);CHKERRQ(ierr); 3979 3980 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3981 PetscFunctionReturn(0); 3982 } 3983 3984 #include "../src/mat/utils/freespace.h" 3985 #include "petscbt.h" 3986 3987 #undef __FUNCT__ 3988 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3989 /*@C 3990 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3991 matrices from each processor 3992 3993 Collective on MPI_Comm 3994 3995 Input Parameters: 3996 + comm - the communicators the parallel matrix will live on 3997 . seqmat - the input sequential matrices 3998 . m - number of local rows (or PETSC_DECIDE) 3999 . n - number of local columns (or PETSC_DECIDE) 4000 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4001 4002 Output Parameter: 4003 . mpimat - the parallel matrix generated 4004 4005 Level: advanced 4006 4007 Notes: 4008 The dimensions of the sequential matrix in each processor MUST be the same. 4009 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4010 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4011 @*/ 4012 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4013 { 4014 PetscErrorCode ierr; 4015 MPI_Comm comm=((PetscObject)mpimat)->comm; 4016 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4017 PetscMPIInt size,rank,taga,*len_s; 4018 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4019 PetscInt proc,m; 4020 PetscInt **buf_ri,**buf_rj; 4021 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4022 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4023 MPI_Request *s_waits,*r_waits; 4024 MPI_Status *status; 4025 MatScalar *aa=a->a; 4026 MatScalar **abuf_r,*ba_i; 4027 Mat_Merge_SeqsToMPI *merge; 4028 PetscContainer container; 4029 4030 PetscFunctionBegin; 4031 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4032 4033 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4034 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4035 4036 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4037 if (container) { 4038 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4039 } 4040 bi = merge->bi; 4041 bj = merge->bj; 4042 buf_ri = merge->buf_ri; 4043 buf_rj = merge->buf_rj; 4044 4045 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4046 owners = merge->rowmap.range; 4047 len_s = merge->len_s; 4048 4049 /* send and recv matrix values */ 4050 /*-----------------------------*/ 4051 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4052 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4053 4054 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4055 for (proc=0,k=0; proc<size; proc++){ 4056 if (!len_s[proc]) continue; 4057 i = owners[proc]; 4058 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4059 k++; 4060 } 4061 4062 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4063 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4064 ierr = PetscFree(status);CHKERRQ(ierr); 4065 4066 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4067 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4068 4069 /* insert mat values of mpimat */ 4070 /*----------------------------*/ 4071 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4072 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4073 nextrow = buf_ri_k + merge->nrecv; 4074 nextai = nextrow + merge->nrecv; 4075 4076 for (k=0; k<merge->nrecv; k++){ 4077 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4078 nrows = *(buf_ri_k[k]); 4079 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4080 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4081 } 4082 4083 /* set values of ba */ 4084 m = merge->rowmap.n; 4085 for (i=0; i<m; i++) { 4086 arow = owners[rank] + i; 4087 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4088 bnzi = bi[i+1] - bi[i]; 4089 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4090 4091 /* add local non-zero vals of this proc's seqmat into ba */ 4092 anzi = ai[arow+1] - ai[arow]; 4093 aj = a->j + ai[arow]; 4094 aa = a->a + ai[arow]; 4095 nextaj = 0; 4096 for (j=0; nextaj<anzi; j++){ 4097 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4098 ba_i[j] += aa[nextaj++]; 4099 } 4100 } 4101 4102 /* add received vals into ba */ 4103 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4104 /* i-th row */ 4105 if (i == *nextrow[k]) { 4106 anzi = *(nextai[k]+1) - *nextai[k]; 4107 aj = buf_rj[k] + *(nextai[k]); 4108 aa = abuf_r[k] + *(nextai[k]); 4109 nextaj = 0; 4110 for (j=0; nextaj<anzi; j++){ 4111 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4112 ba_i[j] += aa[nextaj++]; 4113 } 4114 } 4115 nextrow[k]++; nextai[k]++; 4116 } 4117 } 4118 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4119 } 4120 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4121 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4122 4123 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4124 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4125 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4126 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4127 PetscFunctionReturn(0); 4128 } 4129 4130 #undef __FUNCT__ 4131 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4132 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4133 { 4134 PetscErrorCode ierr; 4135 Mat B_mpi; 4136 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4137 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4138 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4139 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4140 PetscInt len,proc,*dnz,*onz; 4141 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4142 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4143 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4144 MPI_Status *status; 4145 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4146 PetscBT lnkbt; 4147 Mat_Merge_SeqsToMPI *merge; 4148 PetscContainer container; 4149 4150 PetscFunctionBegin; 4151 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4152 4153 /* make sure it is a PETSc comm */ 4154 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4155 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4156 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4157 4158 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4159 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4160 4161 /* determine row ownership */ 4162 /*---------------------------------------------------------*/ 4163 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4164 merge->rowmap.n = m; 4165 merge->rowmap.N = M; 4166 merge->rowmap.bs = 1; 4167 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4168 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4169 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4170 4171 m = merge->rowmap.n; 4172 M = merge->rowmap.N; 4173 owners = merge->rowmap.range; 4174 4175 /* determine the number of messages to send, their lengths */ 4176 /*---------------------------------------------------------*/ 4177 len_s = merge->len_s; 4178 4179 len = 0; /* length of buf_si[] */ 4180 merge->nsend = 0; 4181 for (proc=0; proc<size; proc++){ 4182 len_si[proc] = 0; 4183 if (proc == rank){ 4184 len_s[proc] = 0; 4185 } else { 4186 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4187 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4188 } 4189 if (len_s[proc]) { 4190 merge->nsend++; 4191 nrows = 0; 4192 for (i=owners[proc]; i<owners[proc+1]; i++){ 4193 if (ai[i+1] > ai[i]) nrows++; 4194 } 4195 len_si[proc] = 2*(nrows+1); 4196 len += len_si[proc]; 4197 } 4198 } 4199 4200 /* determine the number and length of messages to receive for ij-structure */ 4201 /*-------------------------------------------------------------------------*/ 4202 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4203 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4204 4205 /* post the Irecv of j-structure */ 4206 /*-------------------------------*/ 4207 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4208 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4209 4210 /* post the Isend of j-structure */ 4211 /*--------------------------------*/ 4212 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4213 sj_waits = si_waits + merge->nsend; 4214 4215 for (proc=0, k=0; proc<size; proc++){ 4216 if (!len_s[proc]) continue; 4217 i = owners[proc]; 4218 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4219 k++; 4220 } 4221 4222 /* receives and sends of j-structure are complete */ 4223 /*------------------------------------------------*/ 4224 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4225 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4226 4227 /* send and recv i-structure */ 4228 /*---------------------------*/ 4229 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4230 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4231 4232 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4233 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4234 for (proc=0,k=0; proc<size; proc++){ 4235 if (!len_s[proc]) continue; 4236 /* form outgoing message for i-structure: 4237 buf_si[0]: nrows to be sent 4238 [1:nrows]: row index (global) 4239 [nrows+1:2*nrows+1]: i-structure index 4240 */ 4241 /*-------------------------------------------*/ 4242 nrows = len_si[proc]/2 - 1; 4243 buf_si_i = buf_si + nrows+1; 4244 buf_si[0] = nrows; 4245 buf_si_i[0] = 0; 4246 nrows = 0; 4247 for (i=owners[proc]; i<owners[proc+1]; i++){ 4248 anzi = ai[i+1] - ai[i]; 4249 if (anzi) { 4250 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4251 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4252 nrows++; 4253 } 4254 } 4255 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4256 k++; 4257 buf_si += len_si[proc]; 4258 } 4259 4260 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4261 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4262 4263 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4264 for (i=0; i<merge->nrecv; i++){ 4265 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4266 } 4267 4268 ierr = PetscFree(len_si);CHKERRQ(ierr); 4269 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4270 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4271 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4272 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4273 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4274 ierr = PetscFree(status);CHKERRQ(ierr); 4275 4276 /* compute a local seq matrix in each processor */ 4277 /*----------------------------------------------*/ 4278 /* allocate bi array and free space for accumulating nonzero column info */ 4279 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4280 bi[0] = 0; 4281 4282 /* create and initialize a linked list */ 4283 nlnk = N+1; 4284 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4285 4286 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4287 len = 0; 4288 len = ai[owners[rank+1]] - ai[owners[rank]]; 4289 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4290 current_space = free_space; 4291 4292 /* determine symbolic info for each local row */ 4293 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4294 nextrow = buf_ri_k + merge->nrecv; 4295 nextai = nextrow + merge->nrecv; 4296 for (k=0; k<merge->nrecv; k++){ 4297 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4298 nrows = *buf_ri_k[k]; 4299 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4300 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4301 } 4302 4303 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4304 len = 0; 4305 for (i=0;i<m;i++) { 4306 bnzi = 0; 4307 /* add local non-zero cols of this proc's seqmat into lnk */ 4308 arow = owners[rank] + i; 4309 anzi = ai[arow+1] - ai[arow]; 4310 aj = a->j + ai[arow]; 4311 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4312 bnzi += nlnk; 4313 /* add received col data into lnk */ 4314 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4315 if (i == *nextrow[k]) { /* i-th row */ 4316 anzi = *(nextai[k]+1) - *nextai[k]; 4317 aj = buf_rj[k] + *nextai[k]; 4318 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4319 bnzi += nlnk; 4320 nextrow[k]++; nextai[k]++; 4321 } 4322 } 4323 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4324 4325 /* if free space is not available, make more free space */ 4326 if (current_space->local_remaining<bnzi) { 4327 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4328 nspacedouble++; 4329 } 4330 /* copy data into free space, then initialize lnk */ 4331 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4332 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4333 4334 current_space->array += bnzi; 4335 current_space->local_used += bnzi; 4336 current_space->local_remaining -= bnzi; 4337 4338 bi[i+1] = bi[i] + bnzi; 4339 } 4340 4341 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4342 4343 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4344 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4345 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4346 4347 /* create symbolic parallel matrix B_mpi */ 4348 /*---------------------------------------*/ 4349 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4350 if (n==PETSC_DECIDE) { 4351 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4352 } else { 4353 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4354 } 4355 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4356 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4357 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4358 4359 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4360 B_mpi->assembled = PETSC_FALSE; 4361 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4362 merge->bi = bi; 4363 merge->bj = bj; 4364 merge->buf_ri = buf_ri; 4365 merge->buf_rj = buf_rj; 4366 merge->coi = PETSC_NULL; 4367 merge->coj = PETSC_NULL; 4368 merge->owners_co = PETSC_NULL; 4369 4370 /* attach the supporting struct to B_mpi for reuse */ 4371 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4372 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4373 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4374 *mpimat = B_mpi; 4375 4376 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4377 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4378 PetscFunctionReturn(0); 4379 } 4380 4381 #undef __FUNCT__ 4382 #define __FUNCT__ "MatMerge_SeqsToMPI" 4383 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4384 { 4385 PetscErrorCode ierr; 4386 4387 PetscFunctionBegin; 4388 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4389 if (scall == MAT_INITIAL_MATRIX){ 4390 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4391 } 4392 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4393 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4394 PetscFunctionReturn(0); 4395 } 4396 4397 #undef __FUNCT__ 4398 #define __FUNCT__ "MatGetLocalMat" 4399 /*@ 4400 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4401 4402 Not Collective 4403 4404 Input Parameters: 4405 + A - the matrix 4406 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4407 4408 Output Parameter: 4409 . A_loc - the local sequential matrix generated 4410 4411 Level: developer 4412 4413 @*/ 4414 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4415 { 4416 PetscErrorCode ierr; 4417 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4418 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4419 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4420 MatScalar *aa=a->a,*ba=b->a,*cam; 4421 PetscScalar *ca; 4422 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4423 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4424 4425 PetscFunctionBegin; 4426 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4427 if (scall == MAT_INITIAL_MATRIX){ 4428 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4429 ci[0] = 0; 4430 for (i=0; i<am; i++){ 4431 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4432 } 4433 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4434 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4435 k = 0; 4436 for (i=0; i<am; i++) { 4437 ncols_o = bi[i+1] - bi[i]; 4438 ncols_d = ai[i+1] - ai[i]; 4439 /* off-diagonal portion of A */ 4440 for (jo=0; jo<ncols_o; jo++) { 4441 col = cmap[*bj]; 4442 if (col >= cstart) break; 4443 cj[k] = col; bj++; 4444 ca[k++] = *ba++; 4445 } 4446 /* diagonal portion of A */ 4447 for (j=0; j<ncols_d; j++) { 4448 cj[k] = cstart + *aj++; 4449 ca[k++] = *aa++; 4450 } 4451 /* off-diagonal portion of A */ 4452 for (j=jo; j<ncols_o; j++) { 4453 cj[k] = cmap[*bj++]; 4454 ca[k++] = *ba++; 4455 } 4456 } 4457 /* put together the new matrix */ 4458 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4459 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4460 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4461 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4462 mat->free_a = PETSC_TRUE; 4463 mat->free_ij = PETSC_TRUE; 4464 mat->nonew = 0; 4465 } else if (scall == MAT_REUSE_MATRIX){ 4466 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4467 ci = mat->i; cj = mat->j; cam = mat->a; 4468 for (i=0; i<am; i++) { 4469 /* off-diagonal portion of A */ 4470 ncols_o = bi[i+1] - bi[i]; 4471 for (jo=0; jo<ncols_o; jo++) { 4472 col = cmap[*bj]; 4473 if (col >= cstart) break; 4474 *cam++ = *ba++; bj++; 4475 } 4476 /* diagonal portion of A */ 4477 ncols_d = ai[i+1] - ai[i]; 4478 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4479 /* off-diagonal portion of A */ 4480 for (j=jo; j<ncols_o; j++) { 4481 *cam++ = *ba++; bj++; 4482 } 4483 } 4484 } else { 4485 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4486 } 4487 4488 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4489 PetscFunctionReturn(0); 4490 } 4491 4492 #undef __FUNCT__ 4493 #define __FUNCT__ "MatGetLocalMatCondensed" 4494 /*@C 4495 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4496 4497 Not Collective 4498 4499 Input Parameters: 4500 + A - the matrix 4501 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4502 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4503 4504 Output Parameter: 4505 . A_loc - the local sequential matrix generated 4506 4507 Level: developer 4508 4509 @*/ 4510 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4511 { 4512 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4513 PetscErrorCode ierr; 4514 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4515 IS isrowa,iscola; 4516 Mat *aloc; 4517 4518 PetscFunctionBegin; 4519 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4520 if (!row){ 4521 start = A->rmap->rstart; end = A->rmap->rend; 4522 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4523 } else { 4524 isrowa = *row; 4525 } 4526 if (!col){ 4527 start = A->cmap->rstart; 4528 cmap = a->garray; 4529 nzA = a->A->cmap->n; 4530 nzB = a->B->cmap->n; 4531 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4532 ncols = 0; 4533 for (i=0; i<nzB; i++) { 4534 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4535 else break; 4536 } 4537 imark = i; 4538 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4539 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4540 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4541 ierr = PetscFree(idx);CHKERRQ(ierr); 4542 } else { 4543 iscola = *col; 4544 } 4545 if (scall != MAT_INITIAL_MATRIX){ 4546 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4547 aloc[0] = *A_loc; 4548 } 4549 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4550 *A_loc = aloc[0]; 4551 ierr = PetscFree(aloc);CHKERRQ(ierr); 4552 if (!row){ 4553 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4554 } 4555 if (!col){ 4556 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4557 } 4558 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4559 PetscFunctionReturn(0); 4560 } 4561 4562 #undef __FUNCT__ 4563 #define __FUNCT__ "MatGetBrowsOfAcols" 4564 /*@C 4565 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4566 4567 Collective on Mat 4568 4569 Input Parameters: 4570 + A,B - the matrices in mpiaij format 4571 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4572 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4573 4574 Output Parameter: 4575 + rowb, colb - index sets of rows and columns of B to extract 4576 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4577 - B_seq - the sequential matrix generated 4578 4579 Level: developer 4580 4581 @*/ 4582 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4583 { 4584 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4585 PetscErrorCode ierr; 4586 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4587 IS isrowb,iscolb; 4588 Mat *bseq; 4589 4590 PetscFunctionBegin; 4591 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4592 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4593 } 4594 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4595 4596 if (scall == MAT_INITIAL_MATRIX){ 4597 start = A->cmap->rstart; 4598 cmap = a->garray; 4599 nzA = a->A->cmap->n; 4600 nzB = a->B->cmap->n; 4601 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4602 ncols = 0; 4603 for (i=0; i<nzB; i++) { /* row < local row index */ 4604 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4605 else break; 4606 } 4607 imark = i; 4608 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4609 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4610 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4611 ierr = PetscFree(idx);CHKERRQ(ierr); 4612 *brstart = imark; 4613 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4614 } else { 4615 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4616 isrowb = *rowb; iscolb = *colb; 4617 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4618 bseq[0] = *B_seq; 4619 } 4620 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4621 *B_seq = bseq[0]; 4622 ierr = PetscFree(bseq);CHKERRQ(ierr); 4623 if (!rowb){ 4624 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4625 } else { 4626 *rowb = isrowb; 4627 } 4628 if (!colb){ 4629 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4630 } else { 4631 *colb = iscolb; 4632 } 4633 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4634 PetscFunctionReturn(0); 4635 } 4636 4637 #undef __FUNCT__ 4638 #define __FUNCT__ "MatGetBrowsOfAoCols" 4639 /*@C 4640 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4641 of the OFF-DIAGONAL portion of local A 4642 4643 Collective on Mat 4644 4645 Input Parameters: 4646 + A,B - the matrices in mpiaij format 4647 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4648 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4649 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4650 4651 Output Parameter: 4652 + B_oth - the sequential matrix generated 4653 4654 Level: developer 4655 4656 @*/ 4657 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4658 { 4659 VecScatter_MPI_General *gen_to,*gen_from; 4660 PetscErrorCode ierr; 4661 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4662 Mat_SeqAIJ *b_oth; 4663 VecScatter ctx=a->Mvctx; 4664 MPI_Comm comm=((PetscObject)ctx)->comm; 4665 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4666 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4667 PetscScalar *rvalues,*svalues; 4668 MatScalar *b_otha,*bufa,*bufA; 4669 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4670 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4671 MPI_Status *sstatus,rstatus; 4672 PetscMPIInt jj; 4673 PetscInt *cols,sbs,rbs; 4674 PetscScalar *vals; 4675 4676 PetscFunctionBegin; 4677 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4678 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4679 } 4680 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4681 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4682 4683 gen_to = (VecScatter_MPI_General*)ctx->todata; 4684 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4685 rvalues = gen_from->values; /* holds the length of receiving row */ 4686 svalues = gen_to->values; /* holds the length of sending row */ 4687 nrecvs = gen_from->n; 4688 nsends = gen_to->n; 4689 4690 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4691 srow = gen_to->indices; /* local row index to be sent */ 4692 sstarts = gen_to->starts; 4693 sprocs = gen_to->procs; 4694 sstatus = gen_to->sstatus; 4695 sbs = gen_to->bs; 4696 rstarts = gen_from->starts; 4697 rprocs = gen_from->procs; 4698 rbs = gen_from->bs; 4699 4700 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4701 if (scall == MAT_INITIAL_MATRIX){ 4702 /* i-array */ 4703 /*---------*/ 4704 /* post receives */ 4705 for (i=0; i<nrecvs; i++){ 4706 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4707 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4708 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4709 } 4710 4711 /* pack the outgoing message */ 4712 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4713 rstartsj = sstartsj + nsends +1; 4714 sstartsj[0] = 0; rstartsj[0] = 0; 4715 len = 0; /* total length of j or a array to be sent */ 4716 k = 0; 4717 for (i=0; i<nsends; i++){ 4718 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4719 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4720 for (j=0; j<nrows; j++) { 4721 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4722 for (l=0; l<sbs; l++){ 4723 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4724 rowlen[j*sbs+l] = ncols; 4725 len += ncols; 4726 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4727 } 4728 k++; 4729 } 4730 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4731 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4732 } 4733 /* recvs and sends of i-array are completed */ 4734 i = nrecvs; 4735 while (i--) { 4736 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4737 } 4738 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4739 4740 /* allocate buffers for sending j and a arrays */ 4741 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4742 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4743 4744 /* create i-array of B_oth */ 4745 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4746 b_othi[0] = 0; 4747 len = 0; /* total length of j or a array to be received */ 4748 k = 0; 4749 for (i=0; i<nrecvs; i++){ 4750 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4751 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4752 for (j=0; j<nrows; j++) { 4753 b_othi[k+1] = b_othi[k] + rowlen[j]; 4754 len += rowlen[j]; k++; 4755 } 4756 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4757 } 4758 4759 /* allocate space for j and a arrrays of B_oth */ 4760 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4761 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4762 4763 /* j-array */ 4764 /*---------*/ 4765 /* post receives of j-array */ 4766 for (i=0; i<nrecvs; i++){ 4767 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4768 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4769 } 4770 4771 /* pack the outgoing message j-array */ 4772 k = 0; 4773 for (i=0; i<nsends; i++){ 4774 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4775 bufJ = bufj+sstartsj[i]; 4776 for (j=0; j<nrows; j++) { 4777 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4778 for (ll=0; ll<sbs; ll++){ 4779 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4780 for (l=0; l<ncols; l++){ 4781 *bufJ++ = cols[l]; 4782 } 4783 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4784 } 4785 } 4786 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4787 } 4788 4789 /* recvs and sends of j-array are completed */ 4790 i = nrecvs; 4791 while (i--) { 4792 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4793 } 4794 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4795 } else if (scall == MAT_REUSE_MATRIX){ 4796 sstartsj = *startsj; 4797 rstartsj = sstartsj + nsends +1; 4798 bufa = *bufa_ptr; 4799 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4800 b_otha = b_oth->a; 4801 } else { 4802 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4803 } 4804 4805 /* a-array */ 4806 /*---------*/ 4807 /* post receives of a-array */ 4808 for (i=0; i<nrecvs; i++){ 4809 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4810 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4811 } 4812 4813 /* pack the outgoing message a-array */ 4814 k = 0; 4815 for (i=0; i<nsends; i++){ 4816 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4817 bufA = bufa+sstartsj[i]; 4818 for (j=0; j<nrows; j++) { 4819 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4820 for (ll=0; ll<sbs; ll++){ 4821 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4822 for (l=0; l<ncols; l++){ 4823 *bufA++ = vals[l]; 4824 } 4825 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4826 } 4827 } 4828 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4829 } 4830 /* recvs and sends of a-array are completed */ 4831 i = nrecvs; 4832 while (i--) { 4833 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4834 } 4835 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4836 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4837 4838 if (scall == MAT_INITIAL_MATRIX){ 4839 /* put together the new matrix */ 4840 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4841 4842 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4843 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4844 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4845 b_oth->free_a = PETSC_TRUE; 4846 b_oth->free_ij = PETSC_TRUE; 4847 b_oth->nonew = 0; 4848 4849 ierr = PetscFree(bufj);CHKERRQ(ierr); 4850 if (!startsj || !bufa_ptr){ 4851 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4852 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4853 } else { 4854 *startsj = sstartsj; 4855 *bufa_ptr = bufa; 4856 } 4857 } 4858 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4859 PetscFunctionReturn(0); 4860 } 4861 4862 #undef __FUNCT__ 4863 #define __FUNCT__ "MatGetCommunicationStructs" 4864 /*@C 4865 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4866 4867 Not Collective 4868 4869 Input Parameters: 4870 . A - The matrix in mpiaij format 4871 4872 Output Parameter: 4873 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4874 . colmap - A map from global column index to local index into lvec 4875 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4876 4877 Level: developer 4878 4879 @*/ 4880 #if defined (PETSC_USE_CTABLE) 4881 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4882 #else 4883 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4884 #endif 4885 { 4886 Mat_MPIAIJ *a; 4887 4888 PetscFunctionBegin; 4889 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4890 PetscValidPointer(lvec, 2) 4891 PetscValidPointer(colmap, 3) 4892 PetscValidPointer(multScatter, 4) 4893 a = (Mat_MPIAIJ *) A->data; 4894 if (lvec) *lvec = a->lvec; 4895 if (colmap) *colmap = a->colmap; 4896 if (multScatter) *multScatter = a->Mvctx; 4897 PetscFunctionReturn(0); 4898 } 4899 4900 EXTERN_C_BEGIN 4901 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4902 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4903 EXTERN_C_END 4904 4905 #include "../src/mat/impls/dense/mpi/mpidense.h" 4906 4907 #undef __FUNCT__ 4908 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4909 /* 4910 Computes (B'*A')' since computing B*A directly is untenable 4911 4912 n p p 4913 ( ) ( ) ( ) 4914 m ( A ) * n ( B ) = m ( C ) 4915 ( ) ( ) ( ) 4916 4917 */ 4918 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4919 { 4920 PetscErrorCode ierr; 4921 Mat At,Bt,Ct; 4922 4923 PetscFunctionBegin; 4924 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4925 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4926 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4927 ierr = MatDestroy(At);CHKERRQ(ierr); 4928 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4929 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4930 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4931 PetscFunctionReturn(0); 4932 } 4933 4934 #undef __FUNCT__ 4935 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4936 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4937 { 4938 PetscErrorCode ierr; 4939 PetscInt m=A->rmap->n,n=B->cmap->n; 4940 Mat Cmat; 4941 4942 PetscFunctionBegin; 4943 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4944 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4945 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4946 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4947 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4948 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4949 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4950 *C = Cmat; 4951 PetscFunctionReturn(0); 4952 } 4953 4954 /* ----------------------------------------------------------------*/ 4955 #undef __FUNCT__ 4956 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4957 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4958 { 4959 PetscErrorCode ierr; 4960 4961 PetscFunctionBegin; 4962 if (scall == MAT_INITIAL_MATRIX){ 4963 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4964 } 4965 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4966 PetscFunctionReturn(0); 4967 } 4968 4969 EXTERN_C_BEGIN 4970 #if defined(PETSC_HAVE_MUMPS) 4971 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4972 #endif 4973 #if defined(PETSC_HAVE_PASTIX) 4974 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4975 #endif 4976 #if defined(PETSC_HAVE_SUPERLU_DIST) 4977 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4978 #endif 4979 #if defined(PETSC_HAVE_SPOOLES) 4980 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4981 #endif 4982 EXTERN_C_END 4983 4984 /*MC 4985 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4986 4987 Options Database Keys: 4988 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4989 4990 Level: beginner 4991 4992 .seealso: MatCreateMPIAIJ() 4993 M*/ 4994 4995 EXTERN_C_BEGIN 4996 #undef __FUNCT__ 4997 #define __FUNCT__ "MatCreate_MPIAIJ" 4998 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4999 { 5000 Mat_MPIAIJ *b; 5001 PetscErrorCode ierr; 5002 PetscMPIInt size; 5003 5004 PetscFunctionBegin; 5005 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5006 5007 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5008 B->data = (void*)b; 5009 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5010 B->rmap->bs = 1; 5011 B->assembled = PETSC_FALSE; 5012 B->mapping = 0; 5013 5014 B->insertmode = NOT_SET_VALUES; 5015 b->size = size; 5016 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5017 5018 /* build cache for off array entries formed */ 5019 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5020 b->donotstash = PETSC_FALSE; 5021 b->colmap = 0; 5022 b->garray = 0; 5023 b->roworiented = PETSC_TRUE; 5024 5025 /* stuff used for matrix vector multiply */ 5026 b->lvec = PETSC_NULL; 5027 b->Mvctx = PETSC_NULL; 5028 5029 /* stuff for MatGetRow() */ 5030 b->rowindices = 0; 5031 b->rowvalues = 0; 5032 b->getrowactive = PETSC_FALSE; 5033 5034 #if defined(PETSC_HAVE_SPOOLES) 5035 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 5036 "MatGetFactor_mpiaij_spooles", 5037 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5038 #endif 5039 #if defined(PETSC_HAVE_MUMPS) 5040 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 5041 "MatGetFactor_mpiaij_mumps", 5042 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5043 #endif 5044 #if defined(PETSC_HAVE_PASTIX) 5045 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 5046 "MatGetFactor_mpiaij_pastix", 5047 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5048 #endif 5049 #if defined(PETSC_HAVE_SUPERLU_DIST) 5050 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 5051 "MatGetFactor_mpiaij_superlu_dist", 5052 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5053 #endif 5054 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5055 "MatStoreValues_MPIAIJ", 5056 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5057 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5058 "MatRetrieveValues_MPIAIJ", 5059 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5060 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5061 "MatGetDiagonalBlock_MPIAIJ", 5062 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5063 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5064 "MatIsTranspose_MPIAIJ", 5065 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5066 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5067 "MatMPIAIJSetPreallocation_MPIAIJ", 5068 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5069 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5070 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5071 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5072 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5073 "MatDiagonalScaleLocal_MPIAIJ", 5074 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5075 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5076 "MatConvert_MPIAIJ_MPICSRPERM", 5077 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5079 "MatConvert_MPIAIJ_MPICRL", 5080 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5081 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5082 "MatMatMult_MPIDense_MPIAIJ", 5083 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5085 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5086 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5088 "MatMatMultNumeric_MPIDense_MPIAIJ", 5089 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5091 PetscFunctionReturn(0); 5092 } 5093 EXTERN_C_END 5094 5095 #undef __FUNCT__ 5096 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5097 /*@ 5098 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5099 and "off-diagonal" part of the matrix in CSR format. 5100 5101 Collective on MPI_Comm 5102 5103 Input Parameters: 5104 + comm - MPI communicator 5105 . m - number of local rows (Cannot be PETSC_DECIDE) 5106 . n - This value should be the same as the local size used in creating the 5107 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5108 calculated if N is given) For square matrices n is almost always m. 5109 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5110 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5111 . i - row indices for "diagonal" portion of matrix 5112 . j - column indices 5113 . a - matrix values 5114 . oi - row indices for "off-diagonal" portion of matrix 5115 . oj - column indices 5116 - oa - matrix values 5117 5118 Output Parameter: 5119 . mat - the matrix 5120 5121 Level: advanced 5122 5123 Notes: 5124 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5125 5126 The i and j indices are 0 based 5127 5128 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5129 5130 5131 .keywords: matrix, aij, compressed row, sparse, parallel 5132 5133 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5134 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5135 @*/ 5136 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5137 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5138 { 5139 PetscErrorCode ierr; 5140 Mat_MPIAIJ *maij; 5141 5142 PetscFunctionBegin; 5143 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5144 if (i[0]) { 5145 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5146 } 5147 if (oi[0]) { 5148 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5149 } 5150 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5151 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5152 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5153 maij = (Mat_MPIAIJ*) (*mat)->data; 5154 maij->donotstash = PETSC_TRUE; 5155 (*mat)->preallocated = PETSC_TRUE; 5156 5157 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5158 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5159 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5160 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5161 5162 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5163 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5164 5165 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5166 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5167 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5168 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5169 5170 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5171 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5172 PetscFunctionReturn(0); 5173 } 5174 5175 /* 5176 Special version for direct calls from Fortran 5177 */ 5178 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5179 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5180 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5181 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5182 #endif 5183 5184 /* Change these macros so can be used in void function */ 5185 #undef CHKERRQ 5186 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5187 #undef SETERRQ2 5188 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5189 #undef SETERRQ 5190 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5191 5192 EXTERN_C_BEGIN 5193 #undef __FUNCT__ 5194 #define __FUNCT__ "matsetvaluesmpiaij_" 5195 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5196 { 5197 Mat mat = *mmat; 5198 PetscInt m = *mm, n = *mn; 5199 InsertMode addv = *maddv; 5200 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5201 PetscScalar value; 5202 PetscErrorCode ierr; 5203 5204 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5205 if (mat->insertmode == NOT_SET_VALUES) { 5206 mat->insertmode = addv; 5207 } 5208 #if defined(PETSC_USE_DEBUG) 5209 else if (mat->insertmode != addv) { 5210 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5211 } 5212 #endif 5213 { 5214 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5215 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5216 PetscTruth roworiented = aij->roworiented; 5217 5218 /* Some Variables required in the macro */ 5219 Mat A = aij->A; 5220 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5221 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5222 MatScalar *aa = a->a; 5223 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5224 Mat B = aij->B; 5225 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5226 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5227 MatScalar *ba = b->a; 5228 5229 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5230 PetscInt nonew = a->nonew; 5231 MatScalar *ap1,*ap2; 5232 5233 PetscFunctionBegin; 5234 for (i=0; i<m; i++) { 5235 if (im[i] < 0) continue; 5236 #if defined(PETSC_USE_DEBUG) 5237 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5238 #endif 5239 if (im[i] >= rstart && im[i] < rend) { 5240 row = im[i] - rstart; 5241 lastcol1 = -1; 5242 rp1 = aj + ai[row]; 5243 ap1 = aa + ai[row]; 5244 rmax1 = aimax[row]; 5245 nrow1 = ailen[row]; 5246 low1 = 0; 5247 high1 = nrow1; 5248 lastcol2 = -1; 5249 rp2 = bj + bi[row]; 5250 ap2 = ba + bi[row]; 5251 rmax2 = bimax[row]; 5252 nrow2 = bilen[row]; 5253 low2 = 0; 5254 high2 = nrow2; 5255 5256 for (j=0; j<n; j++) { 5257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5258 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5259 if (in[j] >= cstart && in[j] < cend){ 5260 col = in[j] - cstart; 5261 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5262 } else if (in[j] < 0) continue; 5263 #if defined(PETSC_USE_DEBUG) 5264 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5265 #endif 5266 else { 5267 if (mat->was_assembled) { 5268 if (!aij->colmap) { 5269 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5270 } 5271 #if defined (PETSC_USE_CTABLE) 5272 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5273 col--; 5274 #else 5275 col = aij->colmap[in[j]] - 1; 5276 #endif 5277 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5278 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5279 col = in[j]; 5280 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5281 B = aij->B; 5282 b = (Mat_SeqAIJ*)B->data; 5283 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5284 rp2 = bj + bi[row]; 5285 ap2 = ba + bi[row]; 5286 rmax2 = bimax[row]; 5287 nrow2 = bilen[row]; 5288 low2 = 0; 5289 high2 = nrow2; 5290 bm = aij->B->rmap->n; 5291 ba = b->a; 5292 } 5293 } else col = in[j]; 5294 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5295 } 5296 } 5297 } else { 5298 if (!aij->donotstash) { 5299 if (roworiented) { 5300 if (ignorezeroentries && v[i*n] == 0.0) continue; 5301 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5302 } else { 5303 if (ignorezeroentries && v[i] == 0.0) continue; 5304 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5305 } 5306 } 5307 } 5308 }} 5309 PetscFunctionReturnVoid(); 5310 } 5311 EXTERN_C_END 5312 5313