1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 if (ignorezeroentries && v[i*n] == 0.0) continue; 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 397 } else { 398 if (ignorezeroentries && v[i] == 0.0) continue; 399 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 400 } 401 } 402 } 403 } 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatGetValues_MPIAIJ" 409 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 414 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 415 416 PetscFunctionBegin; 417 for (i=0; i<m; i++) { 418 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 419 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 420 if (idxm[i] >= rstart && idxm[i] < rend) { 421 row = idxm[i] - rstart; 422 for (j=0; j<n; j++) { 423 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 424 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 425 if (idxn[j] >= cstart && idxn[j] < cend){ 426 col = idxn[j] - cstart; 427 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 428 } else { 429 if (!aij->colmap) { 430 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 431 } 432 #if defined (PETSC_USE_CTABLE) 433 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 434 col --; 435 #else 436 col = aij->colmap[idxn[j]] - 1; 437 #endif 438 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 439 else { 440 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 441 } 442 } 443 } 444 } else { 445 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 446 } 447 } 448 PetscFunctionReturn(0); 449 } 450 451 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 455 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 456 { 457 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 458 PetscErrorCode ierr; 459 PetscInt nstash,reallocs; 460 InsertMode addv; 461 462 PetscFunctionBegin; 463 if (aij->donotstash) { 464 PetscFunctionReturn(0); 465 } 466 467 /* make sure all processors are either in INSERTMODE or ADDMODE */ 468 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 469 if (addv == (ADD_VALUES|INSERT_VALUES)) { 470 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 471 } 472 mat->insertmode = addv; /* in case this processor had no cache */ 473 474 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 475 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 476 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 477 PetscFunctionReturn(0); 478 } 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 482 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 483 { 484 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 485 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 486 PetscErrorCode ierr; 487 PetscMPIInt n; 488 PetscInt i,j,rstart,ncols,flg; 489 PetscInt *row,*col; 490 PetscTruth other_disassembled; 491 PetscScalar *val; 492 InsertMode addv = mat->insertmode; 493 494 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 495 PetscFunctionBegin; 496 if (!aij->donotstash) { 497 while (1) { 498 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 499 if (!flg) break; 500 501 for (i=0; i<n;) { 502 /* Now identify the consecutive vals belonging to the same row */ 503 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 504 if (j < n) ncols = j-i; 505 else ncols = n-i; 506 /* Now assemble all these values with a single function call */ 507 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 508 i = j; 509 } 510 } 511 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 512 } 513 a->compressedrow.use = PETSC_FALSE; 514 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 515 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 516 517 /* determine if any processor has disassembled, if so we must 518 also disassemble ourselfs, in order that we may reassemble. */ 519 /* 520 if nonzero structure of submatrix B cannot change then we know that 521 no processor disassembled thus we can skip this stuff 522 */ 523 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 524 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 525 if (mat->was_assembled && !other_disassembled) { 526 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 } 529 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 530 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 531 } 532 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 533 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 534 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 535 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 536 537 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 538 aij->rowvalues = 0; 539 540 /* used by MatAXPY() */ 541 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 542 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 543 544 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 545 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 546 PetscFunctionReturn(0); 547 } 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 551 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 552 { 553 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 554 PetscErrorCode ierr; 555 556 PetscFunctionBegin; 557 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 558 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatZeroRows_MPIAIJ" 564 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 565 { 566 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 567 PetscErrorCode ierr; 568 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 569 PetscInt i,*owners = A->rmap->range; 570 PetscInt *nprocs,j,idx,nsends,row; 571 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 572 PetscInt *rvalues,count,base,slen,*source; 573 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 574 MPI_Comm comm = ((PetscObject)A)->comm; 575 MPI_Request *send_waits,*recv_waits; 576 MPI_Status recv_status,*send_status; 577 #if defined(PETSC_DEBUG) 578 PetscTruth found = PETSC_FALSE; 579 #endif 580 581 PetscFunctionBegin; 582 /* first count number of contributors to each processor */ 583 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 584 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 585 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 586 j = 0; 587 for (i=0; i<N; i++) { 588 if (lastidx > (idx = rows[i])) j = 0; 589 lastidx = idx; 590 for (; j<size; j++) { 591 if (idx >= owners[j] && idx < owners[j+1]) { 592 nprocs[2*j]++; 593 nprocs[2*j+1] = 1; 594 owner[i] = j; 595 #if defined(PETSC_DEBUG) 596 found = PETSC_TRUE; 597 #endif 598 break; 599 } 600 } 601 #if defined(PETSC_DEBUG) 602 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 603 found = PETSC_FALSE; 604 #endif 605 } 606 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 607 608 /* inform other processors of number of messages and max length*/ 609 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 610 611 /* post receives: */ 612 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 613 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 614 for (i=0; i<nrecvs; i++) { 615 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 616 } 617 618 /* do sends: 619 1) starts[i] gives the starting index in svalues for stuff going to 620 the ith processor 621 */ 622 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 623 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 624 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 625 starts[0] = 0; 626 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 627 for (i=0; i<N; i++) { 628 svalues[starts[owner[i]]++] = rows[i]; 629 } 630 631 starts[0] = 0; 632 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 633 count = 0; 634 for (i=0; i<size; i++) { 635 if (nprocs[2*i+1]) { 636 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 637 } 638 } 639 ierr = PetscFree(starts);CHKERRQ(ierr); 640 641 base = owners[rank]; 642 643 /* wait on receives */ 644 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 645 source = lens + nrecvs; 646 count = nrecvs; slen = 0; 647 while (count) { 648 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 649 /* unpack receives into our local space */ 650 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 651 source[imdex] = recv_status.MPI_SOURCE; 652 lens[imdex] = n; 653 slen += n; 654 count--; 655 } 656 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 657 658 /* move the data into the send scatter */ 659 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 660 count = 0; 661 for (i=0; i<nrecvs; i++) { 662 values = rvalues + i*nmax; 663 for (j=0; j<lens[i]; j++) { 664 lrows[count++] = values[j] - base; 665 } 666 } 667 ierr = PetscFree(rvalues);CHKERRQ(ierr); 668 ierr = PetscFree(lens);CHKERRQ(ierr); 669 ierr = PetscFree(owner);CHKERRQ(ierr); 670 ierr = PetscFree(nprocs);CHKERRQ(ierr); 671 672 /* actually zap the local rows */ 673 /* 674 Zero the required rows. If the "diagonal block" of the matrix 675 is square and the user wishes to set the diagonal we use separate 676 code so that MatSetValues() is not called for each diagonal allocating 677 new memory, thus calling lots of mallocs and slowing things down. 678 679 Contributed by: Matthew Knepley 680 */ 681 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 682 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 683 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 684 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 685 } else if (diag != 0.0) { 686 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 687 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 688 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 689 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 690 } 691 for (i = 0; i < slen; i++) { 692 row = lrows[i] + rstart; 693 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 694 } 695 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 696 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 697 } else { 698 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 699 } 700 ierr = PetscFree(lrows);CHKERRQ(ierr); 701 702 /* wait on sends */ 703 if (nsends) { 704 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 705 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 706 ierr = PetscFree(send_status);CHKERRQ(ierr); 707 } 708 ierr = PetscFree(send_waits);CHKERRQ(ierr); 709 ierr = PetscFree(svalues);CHKERRQ(ierr); 710 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatMult_MPIAIJ" 716 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 717 { 718 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 719 PetscErrorCode ierr; 720 PetscInt nt; 721 722 PetscFunctionBegin; 723 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 724 if (nt != A->cmap->n) { 725 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 726 } 727 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 728 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 729 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 730 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 736 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 737 { 738 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 739 PetscErrorCode ierr; 740 741 PetscFunctionBegin; 742 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultAdd_MPIAIJ" 748 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 755 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 756 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 757 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 758 PetscFunctionReturn(0); 759 } 760 761 #undef __FUNCT__ 762 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 763 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 764 { 765 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 766 PetscErrorCode ierr; 767 PetscTruth merged; 768 769 PetscFunctionBegin; 770 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 771 /* do nondiagonal part */ 772 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 773 if (!merged) { 774 /* send it on its way */ 775 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 776 /* do local part */ 777 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 778 /* receive remote parts: note this assumes the values are not actually */ 779 /* added in yy until the next line, */ 780 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 781 } else { 782 /* do local part */ 783 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 784 /* send it on its way */ 785 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 786 /* values actually were received in the Begin() but we need to call this nop */ 787 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 788 } 789 PetscFunctionReturn(0); 790 } 791 792 EXTERN_C_BEGIN 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 795 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 796 { 797 MPI_Comm comm; 798 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 799 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 800 IS Me,Notme; 801 PetscErrorCode ierr; 802 PetscInt M,N,first,last,*notme,i; 803 PetscMPIInt size; 804 805 PetscFunctionBegin; 806 807 /* Easy test: symmetric diagonal block */ 808 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 809 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 810 if (!*f) PetscFunctionReturn(0); 811 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 812 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 813 if (size == 1) PetscFunctionReturn(0); 814 815 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 816 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 817 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 818 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 819 for (i=0; i<first; i++) notme[i] = i; 820 for (i=last; i<M; i++) notme[i-last+first] = i; 821 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 822 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 823 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 824 Aoff = Aoffs[0]; 825 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 826 Boff = Boffs[0]; 827 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 828 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 829 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 830 ierr = ISDestroy(Me);CHKERRQ(ierr); 831 ierr = ISDestroy(Notme);CHKERRQ(ierr); 832 833 PetscFunctionReturn(0); 834 } 835 EXTERN_C_END 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 839 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 840 { 841 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 842 PetscErrorCode ierr; 843 844 PetscFunctionBegin; 845 /* do nondiagonal part */ 846 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 847 /* send it on its way */ 848 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 849 /* do local part */ 850 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 851 /* receive remote parts */ 852 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 853 PetscFunctionReturn(0); 854 } 855 856 /* 857 This only works correctly for square matrices where the subblock A->A is the 858 diagonal block 859 */ 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 862 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 863 { 864 PetscErrorCode ierr; 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 867 PetscFunctionBegin; 868 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 869 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 870 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 871 } 872 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 873 PetscFunctionReturn(0); 874 } 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatScale_MPIAIJ" 878 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 879 { 880 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 881 PetscErrorCode ierr; 882 883 PetscFunctionBegin; 884 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 885 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatDestroy_MPIAIJ" 891 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 892 { 893 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 894 PetscErrorCode ierr; 895 896 PetscFunctionBegin; 897 #if defined(PETSC_USE_LOG) 898 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 899 #endif 900 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 901 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 902 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 903 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 904 #if defined (PETSC_USE_CTABLE) 905 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 906 #else 907 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 908 #endif 909 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 910 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 911 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 912 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 913 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 914 ierr = PetscFree(aij);CHKERRQ(ierr); 915 916 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 922 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 923 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "MatView_MPIAIJ_Binary" 929 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 930 { 931 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 932 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 933 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 934 PetscErrorCode ierr; 935 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 936 int fd; 937 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 938 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 939 PetscScalar *column_values; 940 941 PetscFunctionBegin; 942 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 943 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 944 nz = A->nz + B->nz; 945 if (!rank) { 946 header[0] = MAT_FILE_COOKIE; 947 header[1] = mat->rmap->N; 948 header[2] = mat->cmap->N; 949 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 950 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 951 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 952 /* get largest number of rows any processor has */ 953 rlen = mat->rmap->n; 954 range = mat->rmap->range; 955 for (i=1; i<size; i++) { 956 rlen = PetscMax(rlen,range[i+1] - range[i]); 957 } 958 } else { 959 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 960 rlen = mat->rmap->n; 961 } 962 963 /* load up the local row counts */ 964 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 965 for (i=0; i<mat->rmap->n; i++) { 966 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 967 } 968 969 /* store the row lengths to the file */ 970 if (!rank) { 971 MPI_Status status; 972 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 973 for (i=1; i<size; i++) { 974 rlen = range[i+1] - range[i]; 975 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 976 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 977 } 978 } else { 979 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 980 } 981 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 982 983 /* load up the local column indices */ 984 nzmax = nz; /* )th processor needs space a largest processor needs */ 985 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 986 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 987 cnt = 0; 988 for (i=0; i<mat->rmap->n; i++) { 989 for (j=B->i[i]; j<B->i[i+1]; j++) { 990 if ( (col = garray[B->j[j]]) > cstart) break; 991 column_indices[cnt++] = col; 992 } 993 for (k=A->i[i]; k<A->i[i+1]; k++) { 994 column_indices[cnt++] = A->j[k] + cstart; 995 } 996 for (; j<B->i[i+1]; j++) { 997 column_indices[cnt++] = garray[B->j[j]]; 998 } 999 } 1000 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1001 1002 /* store the column indices to the file */ 1003 if (!rank) { 1004 MPI_Status status; 1005 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1006 for (i=1; i<size; i++) { 1007 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1009 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1010 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1011 } 1012 } else { 1013 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1014 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1015 } 1016 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1017 1018 /* load up the local column values */ 1019 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1020 cnt = 0; 1021 for (i=0; i<mat->rmap->n; i++) { 1022 for (j=B->i[i]; j<B->i[i+1]; j++) { 1023 if ( garray[B->j[j]] > cstart) break; 1024 column_values[cnt++] = B->a[j]; 1025 } 1026 for (k=A->i[i]; k<A->i[i+1]; k++) { 1027 column_values[cnt++] = A->a[k]; 1028 } 1029 for (; j<B->i[i+1]; j++) { 1030 column_values[cnt++] = B->a[j]; 1031 } 1032 } 1033 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1034 1035 /* store the column values to the file */ 1036 if (!rank) { 1037 MPI_Status status; 1038 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1039 for (i=1; i<size; i++) { 1040 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1042 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1043 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1044 } 1045 } else { 1046 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1047 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1048 } 1049 ierr = PetscFree(column_values);CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1055 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1056 { 1057 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1058 PetscErrorCode ierr; 1059 PetscMPIInt rank = aij->rank,size = aij->size; 1060 PetscTruth isdraw,iascii,isbinary; 1061 PetscViewer sviewer; 1062 PetscViewerFormat format; 1063 1064 PetscFunctionBegin; 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1066 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1067 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1068 if (iascii) { 1069 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1070 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1071 MatInfo info; 1072 PetscTruth inodes; 1073 1074 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1075 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1076 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1077 if (!inodes) { 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1079 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1080 } else { 1081 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1082 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1083 } 1084 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1088 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1089 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1090 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1093 PetscInt inodecount,inodelimit,*inodes; 1094 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1095 if (inodes) { 1096 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1097 } else { 1098 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1102 PetscFunctionReturn(0); 1103 } 1104 } else if (isbinary) { 1105 if (size == 1) { 1106 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1107 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1108 } else { 1109 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1110 } 1111 PetscFunctionReturn(0); 1112 } else if (isdraw) { 1113 PetscDraw draw; 1114 PetscTruth isnull; 1115 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1116 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1117 } 1118 1119 if (size == 1) { 1120 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1121 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1122 } else { 1123 /* assemble the entire matrix onto first processor. */ 1124 Mat A; 1125 Mat_SeqAIJ *Aloc; 1126 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1127 MatScalar *a; 1128 1129 if (mat->rmap->N > 1024) { 1130 PetscTruth flg = PETSC_FALSE; 1131 1132 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1133 if (!flg) { 1134 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1135 } 1136 } 1137 1138 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1139 if (!rank) { 1140 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1141 } else { 1142 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1143 } 1144 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1145 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1146 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1147 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1148 1149 /* copy over the A part */ 1150 Aloc = (Mat_SeqAIJ*)aij->A->data; 1151 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1152 row = mat->rmap->rstart; 1153 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1154 for (i=0; i<m; i++) { 1155 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1156 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1157 } 1158 aj = Aloc->j; 1159 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1160 1161 /* copy over the B part */ 1162 Aloc = (Mat_SeqAIJ*)aij->B->data; 1163 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1164 row = mat->rmap->rstart; 1165 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1166 ct = cols; 1167 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1168 for (i=0; i<m; i++) { 1169 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1170 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1171 } 1172 ierr = PetscFree(ct);CHKERRQ(ierr); 1173 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1174 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1175 /* 1176 Everyone has to call to draw the matrix since the graphics waits are 1177 synchronized across all processors that share the PetscDraw object 1178 */ 1179 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1180 if (!rank) { 1181 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1182 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1183 } 1184 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1185 ierr = MatDestroy(A);CHKERRQ(ierr); 1186 } 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatView_MPIAIJ" 1192 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1193 { 1194 PetscErrorCode ierr; 1195 PetscTruth iascii,isdraw,issocket,isbinary; 1196 1197 PetscFunctionBegin; 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1202 if (iascii || isdraw || isbinary || issocket) { 1203 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1204 } else { 1205 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatRelax_MPIAIJ" 1212 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1213 { 1214 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1215 PetscErrorCode ierr; 1216 Vec bb1 = 0; 1217 PetscTruth hasop; 1218 1219 PetscFunctionBegin; 1220 if (its > 1) {ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);} 1221 1222 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1223 if (flag & SOR_ZERO_INITIAL_GUESS) { 1224 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1225 its--; 1226 } 1227 1228 while (its--) { 1229 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 1232 /* update rhs: bb1 = bb - B*x */ 1233 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1234 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1235 1236 /* local sweep */ 1237 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1238 } 1239 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1240 if (flag & SOR_ZERO_INITIAL_GUESS) { 1241 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1242 its--; 1243 } 1244 while (its--) { 1245 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1247 1248 /* update rhs: bb1 = bb - B*x */ 1249 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1250 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1251 1252 /* local sweep */ 1253 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1254 } 1255 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1256 if (flag & SOR_ZERO_INITIAL_GUESS) { 1257 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1258 its--; 1259 } 1260 while (its--) { 1261 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1262 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 1264 /* update rhs: bb1 = bb - B*x */ 1265 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1266 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1267 1268 /* local sweep */ 1269 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1270 } 1271 } else if (flag & SOR_EISENSTAT) { 1272 Vec xx1; 1273 1274 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1275 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1276 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1277 1278 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1280 if (!mat->diag) { 1281 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1282 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1283 } 1284 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1285 if (hasop) { 1286 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1287 } else { 1288 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1289 } 1290 ierr = VecWAXPY(bb1,-1.0,bb1,bb);CHKERRQ(ierr); 1291 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1292 1293 /* local sweep */ 1294 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1295 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1296 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1297 } else { 1298 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1299 } 1300 1301 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatPermute_MPIAIJ" 1307 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1308 { 1309 MPI_Comm comm,pcomm; 1310 PetscInt first,local_size,nrows; 1311 const PetscInt *rows; 1312 PetscMPIInt size; 1313 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1314 PetscErrorCode ierr; 1315 1316 PetscFunctionBegin; 1317 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1318 /* make a collective version of 'rowp' */ 1319 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1320 if (pcomm==comm) { 1321 crowp = rowp; 1322 } else { 1323 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1324 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1325 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1326 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1327 } 1328 /* collect the global row permutation and invert it */ 1329 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1330 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1331 if (pcomm!=comm) { 1332 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1333 } 1334 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1335 /* get the local target indices */ 1336 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1337 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1338 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1340 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1341 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1342 /* the column permutation is so much easier; 1343 make a local version of 'colp' and invert it */ 1344 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1345 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1346 if (size==1) { 1347 lcolp = colp; 1348 } else { 1349 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1350 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1351 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1352 } 1353 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1354 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1355 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1356 if (size>1) { 1357 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1358 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1359 } 1360 /* now we just get the submatrix */ 1361 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1362 /* clean up */ 1363 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1364 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1370 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1371 { 1372 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1373 Mat A = mat->A,B = mat->B; 1374 PetscErrorCode ierr; 1375 PetscReal isend[5],irecv[5]; 1376 1377 PetscFunctionBegin; 1378 info->block_size = 1.0; 1379 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1380 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1381 isend[3] = info->memory; isend[4] = info->mallocs; 1382 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1383 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1384 isend[3] += info->memory; isend[4] += info->mallocs; 1385 if (flag == MAT_LOCAL) { 1386 info->nz_used = isend[0]; 1387 info->nz_allocated = isend[1]; 1388 info->nz_unneeded = isend[2]; 1389 info->memory = isend[3]; 1390 info->mallocs = isend[4]; 1391 } else if (flag == MAT_GLOBAL_MAX) { 1392 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1393 info->nz_used = irecv[0]; 1394 info->nz_allocated = irecv[1]; 1395 info->nz_unneeded = irecv[2]; 1396 info->memory = irecv[3]; 1397 info->mallocs = irecv[4]; 1398 } else if (flag == MAT_GLOBAL_SUM) { 1399 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1400 info->nz_used = irecv[0]; 1401 info->nz_allocated = irecv[1]; 1402 info->nz_unneeded = irecv[2]; 1403 info->memory = irecv[3]; 1404 info->mallocs = irecv[4]; 1405 } 1406 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1407 info->fill_ratio_needed = 0; 1408 info->factor_mallocs = 0; 1409 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatSetOption_MPIAIJ" 1415 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1416 { 1417 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 switch (op) { 1422 case MAT_NEW_NONZERO_LOCATIONS: 1423 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1424 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1425 case MAT_KEEP_NONZERO_PATTERN: 1426 case MAT_NEW_NONZERO_LOCATION_ERR: 1427 case MAT_USE_INODES: 1428 case MAT_IGNORE_ZERO_ENTRIES: 1429 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1430 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1431 break; 1432 case MAT_ROW_ORIENTED: 1433 a->roworiented = flg; 1434 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1435 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1436 break; 1437 case MAT_NEW_DIAGONALS: 1438 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1439 break; 1440 case MAT_IGNORE_OFF_PROC_ENTRIES: 1441 a->donotstash = PETSC_TRUE; 1442 break; 1443 case MAT_SYMMETRIC: 1444 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1445 break; 1446 case MAT_STRUCTURALLY_SYMMETRIC: 1447 case MAT_HERMITIAN: 1448 case MAT_SYMMETRY_ETERNAL: 1449 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1450 break; 1451 default: 1452 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatGetRow_MPIAIJ" 1459 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1460 { 1461 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1462 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1463 PetscErrorCode ierr; 1464 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1465 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1466 PetscInt *cmap,*idx_p; 1467 1468 PetscFunctionBegin; 1469 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1470 mat->getrowactive = PETSC_TRUE; 1471 1472 if (!mat->rowvalues && (idx || v)) { 1473 /* 1474 allocate enough space to hold information from the longest row. 1475 */ 1476 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1477 PetscInt max = 1,tmp; 1478 for (i=0; i<matin->rmap->n; i++) { 1479 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1480 if (max < tmp) { max = tmp; } 1481 } 1482 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1483 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1484 } 1485 1486 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1487 lrow = row - rstart; 1488 1489 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1490 if (!v) {pvA = 0; pvB = 0;} 1491 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1492 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1493 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1494 nztot = nzA + nzB; 1495 1496 cmap = mat->garray; 1497 if (v || idx) { 1498 if (nztot) { 1499 /* Sort by increasing column numbers, assuming A and B already sorted */ 1500 PetscInt imark = -1; 1501 if (v) { 1502 *v = v_p = mat->rowvalues; 1503 for (i=0; i<nzB; i++) { 1504 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1505 else break; 1506 } 1507 imark = i; 1508 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1509 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1510 } 1511 if (idx) { 1512 *idx = idx_p = mat->rowindices; 1513 if (imark > -1) { 1514 for (i=0; i<imark; i++) { 1515 idx_p[i] = cmap[cworkB[i]]; 1516 } 1517 } else { 1518 for (i=0; i<nzB; i++) { 1519 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1520 else break; 1521 } 1522 imark = i; 1523 } 1524 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1525 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1526 } 1527 } else { 1528 if (idx) *idx = 0; 1529 if (v) *v = 0; 1530 } 1531 } 1532 *nz = nztot; 1533 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1534 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1540 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1541 { 1542 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1543 1544 PetscFunctionBegin; 1545 if (!aij->getrowactive) { 1546 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1547 } 1548 aij->getrowactive = PETSC_FALSE; 1549 PetscFunctionReturn(0); 1550 } 1551 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "MatNorm_MPIAIJ" 1554 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1555 { 1556 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1557 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1558 PetscErrorCode ierr; 1559 PetscInt i,j,cstart = mat->cmap->rstart; 1560 PetscReal sum = 0.0; 1561 MatScalar *v; 1562 1563 PetscFunctionBegin; 1564 if (aij->size == 1) { 1565 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1566 } else { 1567 if (type == NORM_FROBENIUS) { 1568 v = amat->a; 1569 for (i=0; i<amat->nz; i++) { 1570 #if defined(PETSC_USE_COMPLEX) 1571 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1572 #else 1573 sum += (*v)*(*v); v++; 1574 #endif 1575 } 1576 v = bmat->a; 1577 for (i=0; i<bmat->nz; i++) { 1578 #if defined(PETSC_USE_COMPLEX) 1579 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1580 #else 1581 sum += (*v)*(*v); v++; 1582 #endif 1583 } 1584 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1585 *norm = sqrt(*norm); 1586 } else if (type == NORM_1) { /* max column norm */ 1587 PetscReal *tmp,*tmp2; 1588 PetscInt *jj,*garray = aij->garray; 1589 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1590 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1591 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1592 *norm = 0.0; 1593 v = amat->a; jj = amat->j; 1594 for (j=0; j<amat->nz; j++) { 1595 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1596 } 1597 v = bmat->a; jj = bmat->j; 1598 for (j=0; j<bmat->nz; j++) { 1599 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1600 } 1601 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1602 for (j=0; j<mat->cmap->N; j++) { 1603 if (tmp2[j] > *norm) *norm = tmp2[j]; 1604 } 1605 ierr = PetscFree(tmp);CHKERRQ(ierr); 1606 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1607 } else if (type == NORM_INFINITY) { /* max row norm */ 1608 PetscReal ntemp = 0.0; 1609 for (j=0; j<aij->A->rmap->n; j++) { 1610 v = amat->a + amat->i[j]; 1611 sum = 0.0; 1612 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1613 sum += PetscAbsScalar(*v); v++; 1614 } 1615 v = bmat->a + bmat->i[j]; 1616 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1617 sum += PetscAbsScalar(*v); v++; 1618 } 1619 if (sum > ntemp) ntemp = sum; 1620 } 1621 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1622 } else { 1623 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1624 } 1625 } 1626 PetscFunctionReturn(0); 1627 } 1628 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatTranspose_MPIAIJ" 1631 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1632 { 1633 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1634 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1635 PetscErrorCode ierr; 1636 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1637 PetscInt cstart=A->cmap->rstart,ncol; 1638 Mat B; 1639 MatScalar *array; 1640 1641 PetscFunctionBegin; 1642 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1643 1644 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1645 ai = Aloc->i; aj = Aloc->j; 1646 bi = Bloc->i; bj = Bloc->j; 1647 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1648 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1649 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1650 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1651 for (i=0; i<ai[ma]; i++){ 1652 d_nnz[aj[i]] ++; 1653 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1654 } 1655 1656 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1657 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1658 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1659 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1660 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1661 } else { 1662 B = *matout; 1663 } 1664 1665 /* copy over the A part */ 1666 array = Aloc->a; 1667 row = A->rmap->rstart; 1668 for (i=0; i<ma; i++) { 1669 ncol = ai[i+1]-ai[i]; 1670 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1671 row++; array += ncol; aj += ncol; 1672 } 1673 aj = Aloc->j; 1674 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1675 1676 /* copy over the B part */ 1677 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1678 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1679 array = Bloc->a; 1680 row = A->rmap->rstart; 1681 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1682 cols_tmp = cols; 1683 for (i=0; i<mb; i++) { 1684 ncol = bi[i+1]-bi[i]; 1685 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1686 row++; array += ncol; cols_tmp += ncol; 1687 } 1688 ierr = PetscFree(cols);CHKERRQ(ierr); 1689 1690 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1692 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1693 *matout = B; 1694 } else { 1695 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1696 } 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1702 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1703 { 1704 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1705 Mat a = aij->A,b = aij->B; 1706 PetscErrorCode ierr; 1707 PetscInt s1,s2,s3; 1708 1709 PetscFunctionBegin; 1710 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1711 if (rr) { 1712 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1713 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1714 /* Overlap communication with computation. */ 1715 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1716 } 1717 if (ll) { 1718 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1719 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1720 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1721 } 1722 /* scale the diagonal block */ 1723 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1724 1725 if (rr) { 1726 /* Do a scatter end and then right scale the off-diagonal block */ 1727 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1728 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1729 } 1730 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1736 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1737 { 1738 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1743 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1748 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1749 { 1750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1751 PetscErrorCode ierr; 1752 1753 PetscFunctionBegin; 1754 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatEqual_MPIAIJ" 1760 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1761 { 1762 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1763 Mat a,b,c,d; 1764 PetscTruth flg; 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 a = matA->A; b = matA->B; 1769 c = matB->A; d = matB->B; 1770 1771 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1772 if (flg) { 1773 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1774 } 1775 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatCopy_MPIAIJ" 1781 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1782 { 1783 PetscErrorCode ierr; 1784 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1785 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1786 1787 PetscFunctionBegin; 1788 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1789 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1790 /* because of the column compression in the off-processor part of the matrix a->B, 1791 the number of columns in a->B and b->B may be different, hence we cannot call 1792 the MatCopy() directly on the two parts. If need be, we can provide a more 1793 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1794 then copying the submatrices */ 1795 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1796 } else { 1797 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1798 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1799 } 1800 PetscFunctionReturn(0); 1801 } 1802 1803 #undef __FUNCT__ 1804 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1805 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1806 { 1807 PetscErrorCode ierr; 1808 1809 PetscFunctionBegin; 1810 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 #include "petscblaslapack.h" 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatAXPY_MPIAIJ" 1817 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1818 { 1819 PetscErrorCode ierr; 1820 PetscInt i; 1821 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1822 PetscBLASInt bnz,one=1; 1823 Mat_SeqAIJ *x,*y; 1824 1825 PetscFunctionBegin; 1826 if (str == SAME_NONZERO_PATTERN) { 1827 PetscScalar alpha = a; 1828 x = (Mat_SeqAIJ *)xx->A->data; 1829 y = (Mat_SeqAIJ *)yy->A->data; 1830 bnz = PetscBLASIntCast(x->nz); 1831 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1832 x = (Mat_SeqAIJ *)xx->B->data; 1833 y = (Mat_SeqAIJ *)yy->B->data; 1834 bnz = PetscBLASIntCast(x->nz); 1835 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1836 } else if (str == SUBSET_NONZERO_PATTERN) { 1837 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1838 1839 x = (Mat_SeqAIJ *)xx->B->data; 1840 y = (Mat_SeqAIJ *)yy->B->data; 1841 if (y->xtoy && y->XtoY != xx->B) { 1842 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1843 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1844 } 1845 if (!y->xtoy) { /* get xtoy */ 1846 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1847 y->XtoY = xx->B; 1848 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1849 } 1850 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1851 } else { 1852 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1853 } 1854 PetscFunctionReturn(0); 1855 } 1856 1857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatConjugate_MPIAIJ" 1861 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1862 { 1863 #if defined(PETSC_USE_COMPLEX) 1864 PetscErrorCode ierr; 1865 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1866 1867 PetscFunctionBegin; 1868 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1869 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1870 #else 1871 PetscFunctionBegin; 1872 #endif 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatRealPart_MPIAIJ" 1878 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1879 { 1880 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1881 PetscErrorCode ierr; 1882 1883 PetscFunctionBegin; 1884 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1885 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1891 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1892 { 1893 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1894 PetscErrorCode ierr; 1895 1896 PetscFunctionBegin; 1897 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1898 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #ifdef PETSC_HAVE_PBGL 1903 1904 #include <boost/parallel/mpi/bsp_process_group.hpp> 1905 #include <boost/graph/distributed/ilu_default_graph.hpp> 1906 #include <boost/graph/distributed/ilu_0_block.hpp> 1907 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1908 #include <boost/graph/distributed/petsc/interface.hpp> 1909 #include <boost/multi_array.hpp> 1910 #include <boost/parallel/distributed_property_map->hpp> 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1914 /* 1915 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1916 */ 1917 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1918 { 1919 namespace petsc = boost::distributed::petsc; 1920 1921 namespace graph_dist = boost::graph::distributed; 1922 using boost::graph::distributed::ilu_default::process_group_type; 1923 using boost::graph::ilu_permuted; 1924 1925 PetscTruth row_identity, col_identity; 1926 PetscContainer c; 1927 PetscInt m, n, M, N; 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1932 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1933 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1934 if (!row_identity || !col_identity) { 1935 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1936 } 1937 1938 process_group_type pg; 1939 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1940 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1941 lgraph_type& level_graph = *lgraph_p; 1942 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1943 1944 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1945 ilu_permuted(level_graph); 1946 1947 /* put together the new matrix */ 1948 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1949 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1950 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1951 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1952 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1953 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 1956 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1957 ierr = PetscContainerSetPointer(c, lgraph_p); 1958 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1964 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1965 { 1966 PetscFunctionBegin; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatSolve_MPIAIJ" 1972 /* 1973 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1974 */ 1975 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1976 { 1977 namespace graph_dist = boost::graph::distributed; 1978 1979 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1980 lgraph_type* lgraph_p; 1981 PetscContainer c; 1982 PetscErrorCode ierr; 1983 1984 PetscFunctionBegin; 1985 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1986 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1987 ierr = VecCopy(b, x);CHKERRQ(ierr); 1988 1989 PetscScalar* array_x; 1990 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1991 PetscInt sx; 1992 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1993 1994 PetscScalar* array_b; 1995 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1996 PetscInt sb; 1997 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1998 1999 lgraph_type& level_graph = *lgraph_p; 2000 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2001 2002 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2003 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2004 ref_x(array_x, boost::extents[num_vertices(graph)]); 2005 2006 typedef boost::iterator_property_map<array_ref_type::iterator, 2007 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2008 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2009 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2010 2011 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2012 2013 PetscFunctionReturn(0); 2014 } 2015 #endif 2016 2017 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2018 PetscInt nzlocal,nsends,nrecvs; 2019 PetscMPIInt *send_rank; 2020 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 2021 PetscScalar *sbuf_a,**rbuf_a; 2022 PetscErrorCode (*MatDestroy)(Mat); 2023 } Mat_Redundant; 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2027 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2028 { 2029 PetscErrorCode ierr; 2030 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2031 PetscInt i; 2032 2033 PetscFunctionBegin; 2034 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 2035 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2036 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2037 for (i=0; i<redund->nrecvs; i++){ 2038 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2039 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2040 } 2041 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2042 ierr = PetscFree(redund);CHKERRQ(ierr); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatDestroy_MatRedundant" 2048 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2049 { 2050 PetscErrorCode ierr; 2051 PetscContainer container; 2052 Mat_Redundant *redund=PETSC_NULL; 2053 2054 PetscFunctionBegin; 2055 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2056 if (container) { 2057 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2058 } else { 2059 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2060 } 2061 A->ops->destroy = redund->MatDestroy; 2062 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2063 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2064 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2070 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2071 { 2072 PetscMPIInt rank,size; 2073 MPI_Comm comm=((PetscObject)mat)->comm; 2074 PetscErrorCode ierr; 2075 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2076 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2077 PetscInt *rowrange=mat->rmap->range; 2078 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2079 Mat A=aij->A,B=aij->B,C=*matredundant; 2080 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2081 PetscScalar *sbuf_a; 2082 PetscInt nzlocal=a->nz+b->nz; 2083 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2084 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2085 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2086 MatScalar *aworkA,*aworkB; 2087 PetscScalar *vals; 2088 PetscMPIInt tag1,tag2,tag3,imdex; 2089 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2090 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2091 MPI_Status recv_status,*send_status; 2092 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2093 PetscInt **rbuf_j=PETSC_NULL; 2094 PetscScalar **rbuf_a=PETSC_NULL; 2095 Mat_Redundant *redund=PETSC_NULL; 2096 PetscContainer container; 2097 2098 PetscFunctionBegin; 2099 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 2102 if (reuse == MAT_REUSE_MATRIX) { 2103 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2104 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2105 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2106 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2107 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2108 if (container) { 2109 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2110 } else { 2111 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2112 } 2113 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2114 2115 nsends = redund->nsends; 2116 nrecvs = redund->nrecvs; 2117 send_rank = redund->send_rank; recv_rank = send_rank + size; 2118 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2119 sbuf_j = redund->sbuf_j; 2120 sbuf_a = redund->sbuf_a; 2121 rbuf_j = redund->rbuf_j; 2122 rbuf_a = redund->rbuf_a; 2123 } 2124 2125 if (reuse == MAT_INITIAL_MATRIX){ 2126 PetscMPIInt subrank,subsize; 2127 PetscInt nleftover,np_subcomm; 2128 /* get the destination processors' id send_rank, nsends and nrecvs */ 2129 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2130 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2131 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2132 recv_rank = send_rank + size; 2133 np_subcomm = size/nsubcomm; 2134 nleftover = size - nsubcomm*np_subcomm; 2135 nsends = 0; nrecvs = 0; 2136 for (i=0; i<size; i++){ /* i=rank*/ 2137 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2138 send_rank[nsends] = i; nsends++; 2139 recv_rank[nrecvs++] = i; 2140 } 2141 } 2142 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2143 i = size-nleftover-1; 2144 j = 0; 2145 while (j < nsubcomm - nleftover){ 2146 send_rank[nsends++] = i; 2147 i--; j++; 2148 } 2149 } 2150 2151 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2152 for (i=0; i<nleftover; i++){ 2153 recv_rank[nrecvs++] = size-nleftover+i; 2154 } 2155 } 2156 2157 /* allocate sbuf_j, sbuf_a */ 2158 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2159 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2160 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2161 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2162 2163 /* copy mat's local entries into the buffers */ 2164 if (reuse == MAT_INITIAL_MATRIX){ 2165 rownz_max = 0; 2166 rptr = sbuf_j; 2167 cols = sbuf_j + rend-rstart + 1; 2168 vals = sbuf_a; 2169 rptr[0] = 0; 2170 for (i=0; i<rend-rstart; i++){ 2171 row = i + rstart; 2172 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2173 ncols = nzA + nzB; 2174 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2175 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2176 /* load the column indices for this row into cols */ 2177 lwrite = 0; 2178 for (l=0; l<nzB; l++) { 2179 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2180 vals[lwrite] = aworkB[l]; 2181 cols[lwrite++] = ctmp; 2182 } 2183 } 2184 for (l=0; l<nzA; l++){ 2185 vals[lwrite] = aworkA[l]; 2186 cols[lwrite++] = cstart + cworkA[l]; 2187 } 2188 for (l=0; l<nzB; l++) { 2189 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2190 vals[lwrite] = aworkB[l]; 2191 cols[lwrite++] = ctmp; 2192 } 2193 } 2194 vals += ncols; 2195 cols += ncols; 2196 rptr[i+1] = rptr[i] + ncols; 2197 if (rownz_max < ncols) rownz_max = ncols; 2198 } 2199 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2200 } else { /* only copy matrix values into sbuf_a */ 2201 rptr = sbuf_j; 2202 vals = sbuf_a; 2203 rptr[0] = 0; 2204 for (i=0; i<rend-rstart; i++){ 2205 row = i + rstart; 2206 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2207 ncols = nzA + nzB; 2208 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2209 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2210 lwrite = 0; 2211 for (l=0; l<nzB; l++) { 2212 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2213 } 2214 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2215 for (l=0; l<nzB; l++) { 2216 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2217 } 2218 vals += ncols; 2219 rptr[i+1] = rptr[i] + ncols; 2220 } 2221 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2222 2223 /* send nzlocal to others, and recv other's nzlocal */ 2224 /*--------------------------------------------------*/ 2225 if (reuse == MAT_INITIAL_MATRIX){ 2226 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2227 s_waits2 = s_waits3 + nsends; 2228 s_waits1 = s_waits2 + nsends; 2229 r_waits1 = s_waits1 + nsends; 2230 r_waits2 = r_waits1 + nrecvs; 2231 r_waits3 = r_waits2 + nrecvs; 2232 } else { 2233 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2234 r_waits3 = s_waits3 + nsends; 2235 } 2236 2237 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2238 if (reuse == MAT_INITIAL_MATRIX){ 2239 /* get new tags to keep the communication clean */ 2240 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2241 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2242 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2243 rbuf_nz = sbuf_nz + nsends; 2244 2245 /* post receives of other's nzlocal */ 2246 for (i=0; i<nrecvs; i++){ 2247 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2248 } 2249 /* send nzlocal to others */ 2250 for (i=0; i<nsends; i++){ 2251 sbuf_nz[i] = nzlocal; 2252 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2253 } 2254 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2255 count = nrecvs; 2256 while (count) { 2257 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2258 recv_rank[imdex] = recv_status.MPI_SOURCE; 2259 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2260 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2261 2262 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2263 rbuf_nz[imdex] += i + 2; 2264 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2265 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2266 count--; 2267 } 2268 /* wait on sends of nzlocal */ 2269 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2270 /* send mat->i,j to others, and recv from other's */ 2271 /*------------------------------------------------*/ 2272 for (i=0; i<nsends; i++){ 2273 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2274 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2275 } 2276 /* wait on receives of mat->i,j */ 2277 /*------------------------------*/ 2278 count = nrecvs; 2279 while (count) { 2280 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2281 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2282 count--; 2283 } 2284 /* wait on sends of mat->i,j */ 2285 /*---------------------------*/ 2286 if (nsends) { 2287 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2288 } 2289 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2290 2291 /* post receives, send and receive mat->a */ 2292 /*----------------------------------------*/ 2293 for (imdex=0; imdex<nrecvs; imdex++) { 2294 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2295 } 2296 for (i=0; i<nsends; i++){ 2297 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2298 } 2299 count = nrecvs; 2300 while (count) { 2301 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2302 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2303 count--; 2304 } 2305 if (nsends) { 2306 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2307 } 2308 2309 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2310 2311 /* create redundant matrix */ 2312 /*-------------------------*/ 2313 if (reuse == MAT_INITIAL_MATRIX){ 2314 /* compute rownz_max for preallocation */ 2315 for (imdex=0; imdex<nrecvs; imdex++){ 2316 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2317 rptr = rbuf_j[imdex]; 2318 for (i=0; i<j; i++){ 2319 ncols = rptr[i+1] - rptr[i]; 2320 if (rownz_max < ncols) rownz_max = ncols; 2321 } 2322 } 2323 2324 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2325 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2326 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2327 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2328 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2329 } else { 2330 C = *matredundant; 2331 } 2332 2333 /* insert local matrix entries */ 2334 rptr = sbuf_j; 2335 cols = sbuf_j + rend-rstart + 1; 2336 vals = sbuf_a; 2337 for (i=0; i<rend-rstart; i++){ 2338 row = i + rstart; 2339 ncols = rptr[i+1] - rptr[i]; 2340 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2341 vals += ncols; 2342 cols += ncols; 2343 } 2344 /* insert received matrix entries */ 2345 for (imdex=0; imdex<nrecvs; imdex++){ 2346 rstart = rowrange[recv_rank[imdex]]; 2347 rend = rowrange[recv_rank[imdex]+1]; 2348 rptr = rbuf_j[imdex]; 2349 cols = rbuf_j[imdex] + rend-rstart + 1; 2350 vals = rbuf_a[imdex]; 2351 for (i=0; i<rend-rstart; i++){ 2352 row = i + rstart; 2353 ncols = rptr[i+1] - rptr[i]; 2354 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2355 vals += ncols; 2356 cols += ncols; 2357 } 2358 } 2359 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2362 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2363 if (reuse == MAT_INITIAL_MATRIX){ 2364 PetscContainer container; 2365 *matredundant = C; 2366 /* create a supporting struct and attach it to C for reuse */ 2367 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2368 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2369 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2370 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2371 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2372 2373 redund->nzlocal = nzlocal; 2374 redund->nsends = nsends; 2375 redund->nrecvs = nrecvs; 2376 redund->send_rank = send_rank; 2377 redund->sbuf_nz = sbuf_nz; 2378 redund->sbuf_j = sbuf_j; 2379 redund->sbuf_a = sbuf_a; 2380 redund->rbuf_j = rbuf_j; 2381 redund->rbuf_a = rbuf_a; 2382 2383 redund->MatDestroy = C->ops->destroy; 2384 C->ops->destroy = MatDestroy_MatRedundant; 2385 } 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2391 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2392 { 2393 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2394 PetscErrorCode ierr; 2395 PetscInt i,*idxb = 0; 2396 PetscScalar *va,*vb; 2397 Vec vtmp; 2398 2399 PetscFunctionBegin; 2400 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2401 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2402 if (idx) { 2403 for (i=0; i<A->rmap->n; i++) { 2404 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2405 } 2406 } 2407 2408 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2409 if (idx) { 2410 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2411 } 2412 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2413 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2414 2415 for (i=0; i<A->rmap->n; i++){ 2416 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2417 va[i] = vb[i]; 2418 if (idx) idx[i] = a->garray[idxb[i]]; 2419 } 2420 } 2421 2422 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2423 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2424 if (idxb) { 2425 ierr = PetscFree(idxb);CHKERRQ(ierr); 2426 } 2427 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2428 PetscFunctionReturn(0); 2429 } 2430 2431 #undef __FUNCT__ 2432 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2433 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2434 { 2435 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2436 PetscErrorCode ierr; 2437 PetscInt i,*idxb = 0; 2438 PetscScalar *va,*vb; 2439 Vec vtmp; 2440 2441 PetscFunctionBegin; 2442 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2443 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2444 if (idx) { 2445 for (i=0; i<A->cmap->n; i++) { 2446 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2447 } 2448 } 2449 2450 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2451 if (idx) { 2452 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2453 } 2454 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2455 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2456 2457 for (i=0; i<A->rmap->n; i++){ 2458 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2459 va[i] = vb[i]; 2460 if (idx) idx[i] = a->garray[idxb[i]]; 2461 } 2462 } 2463 2464 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2465 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2466 if (idxb) { 2467 ierr = PetscFree(idxb);CHKERRQ(ierr); 2468 } 2469 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2475 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2476 { 2477 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2478 PetscInt n = A->rmap->n; 2479 PetscInt cstart = A->cmap->rstart; 2480 PetscInt *cmap = mat->garray; 2481 PetscInt *diagIdx, *offdiagIdx; 2482 Vec diagV, offdiagV; 2483 PetscScalar *a, *diagA, *offdiagA; 2484 PetscInt r; 2485 PetscErrorCode ierr; 2486 2487 PetscFunctionBegin; 2488 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2489 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2490 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2491 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2492 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2493 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2494 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2495 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2496 for(r = 0; r < n; ++r) { 2497 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2498 a[r] = diagA[r]; 2499 idx[r] = cstart + diagIdx[r]; 2500 } else { 2501 a[r] = offdiagA[r]; 2502 idx[r] = cmap[offdiagIdx[r]]; 2503 } 2504 } 2505 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2509 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2510 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2511 PetscFunctionReturn(0); 2512 } 2513 2514 #undef __FUNCT__ 2515 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2516 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2517 { 2518 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2519 PetscInt n = A->rmap->n; 2520 PetscInt cstart = A->cmap->rstart; 2521 PetscInt *cmap = mat->garray; 2522 PetscInt *diagIdx, *offdiagIdx; 2523 Vec diagV, offdiagV; 2524 PetscScalar *a, *diagA, *offdiagA; 2525 PetscInt r; 2526 PetscErrorCode ierr; 2527 2528 PetscFunctionBegin; 2529 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2530 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2531 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2532 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2533 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2534 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2535 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2536 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2537 for(r = 0; r < n; ++r) { 2538 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2539 a[r] = diagA[r]; 2540 idx[r] = cstart + diagIdx[r]; 2541 } else { 2542 a[r] = offdiagA[r]; 2543 idx[r] = cmap[offdiagIdx[r]]; 2544 } 2545 } 2546 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2547 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2548 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2549 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2550 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2551 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2552 PetscFunctionReturn(0); 2553 } 2554 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2557 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2558 { 2559 PetscErrorCode ierr; 2560 2561 PetscFunctionBegin; 2562 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2563 PetscFunctionReturn(0); 2564 } 2565 2566 /* -------------------------------------------------------------------*/ 2567 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2568 MatGetRow_MPIAIJ, 2569 MatRestoreRow_MPIAIJ, 2570 MatMult_MPIAIJ, 2571 /* 4*/ MatMultAdd_MPIAIJ, 2572 MatMultTranspose_MPIAIJ, 2573 MatMultTransposeAdd_MPIAIJ, 2574 #ifdef PETSC_HAVE_PBGL 2575 MatSolve_MPIAIJ, 2576 #else 2577 0, 2578 #endif 2579 0, 2580 0, 2581 /*10*/ 0, 2582 0, 2583 0, 2584 MatRelax_MPIAIJ, 2585 MatTranspose_MPIAIJ, 2586 /*15*/ MatGetInfo_MPIAIJ, 2587 MatEqual_MPIAIJ, 2588 MatGetDiagonal_MPIAIJ, 2589 MatDiagonalScale_MPIAIJ, 2590 MatNorm_MPIAIJ, 2591 /*20*/ MatAssemblyBegin_MPIAIJ, 2592 MatAssemblyEnd_MPIAIJ, 2593 MatSetOption_MPIAIJ, 2594 MatZeroEntries_MPIAIJ, 2595 /*24*/ MatZeroRows_MPIAIJ, 2596 0, 2597 #ifdef PETSC_HAVE_PBGL 2598 0, 2599 #else 2600 0, 2601 #endif 2602 0, 2603 0, 2604 /*29*/ MatSetUpPreallocation_MPIAIJ, 2605 #ifdef PETSC_HAVE_PBGL 2606 0, 2607 #else 2608 0, 2609 #endif 2610 0, 2611 0, 2612 0, 2613 /*34*/ MatDuplicate_MPIAIJ, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*39*/ MatAXPY_MPIAIJ, 2619 MatGetSubMatrices_MPIAIJ, 2620 MatIncreaseOverlap_MPIAIJ, 2621 MatGetValues_MPIAIJ, 2622 MatCopy_MPIAIJ, 2623 /*44*/ MatGetRowMax_MPIAIJ, 2624 MatScale_MPIAIJ, 2625 0, 2626 0, 2627 0, 2628 /*49*/ MatSetBlockSize_MPIAIJ, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /*54*/ MatFDColoringCreate_MPIAIJ, 2634 0, 2635 MatSetUnfactored_MPIAIJ, 2636 MatPermute_MPIAIJ, 2637 0, 2638 /*59*/ MatGetSubMatrix_MPIAIJ, 2639 MatDestroy_MPIAIJ, 2640 MatView_MPIAIJ, 2641 0, 2642 0, 2643 /*64*/ 0, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2649 MatGetRowMinAbs_MPIAIJ, 2650 0, 2651 MatSetColoring_MPIAIJ, 2652 #if defined(PETSC_HAVE_ADIC) 2653 MatSetValuesAdic_MPIAIJ, 2654 #else 2655 0, 2656 #endif 2657 MatSetValuesAdifor_MPIAIJ, 2658 /*75*/ 0, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*80*/ 0, 2664 0, 2665 0, 2666 /*83*/ MatLoad_MPIAIJ, 2667 0, 2668 0, 2669 0, 2670 0, 2671 0, 2672 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2673 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2674 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2675 MatPtAP_Basic, 2676 MatPtAPSymbolic_MPIAIJ, 2677 /*94*/ MatPtAPNumeric_MPIAIJ, 2678 0, 2679 0, 2680 0, 2681 0, 2682 /*99*/ 0, 2683 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2684 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2685 MatConjugate_MPIAIJ, 2686 0, 2687 /*104*/MatSetValuesRow_MPIAIJ, 2688 MatRealPart_MPIAIJ, 2689 MatImaginaryPart_MPIAIJ, 2690 0, 2691 0, 2692 /*109*/0, 2693 MatGetRedundantMatrix_MPIAIJ, 2694 MatGetRowMin_MPIAIJ, 2695 0, 2696 0, 2697 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2698 0, 2699 0, 2700 0, 2701 0, 2702 0 2703 }; 2704 2705 /* ----------------------------------------------------------------------------------------*/ 2706 2707 EXTERN_C_BEGIN 2708 #undef __FUNCT__ 2709 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2710 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2711 { 2712 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2713 PetscErrorCode ierr; 2714 2715 PetscFunctionBegin; 2716 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2717 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2718 PetscFunctionReturn(0); 2719 } 2720 EXTERN_C_END 2721 2722 EXTERN_C_BEGIN 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2725 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2726 { 2727 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2728 PetscErrorCode ierr; 2729 2730 PetscFunctionBegin; 2731 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2732 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2733 PetscFunctionReturn(0); 2734 } 2735 EXTERN_C_END 2736 2737 #include "petscpc.h" 2738 EXTERN_C_BEGIN 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2741 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2742 { 2743 Mat_MPIAIJ *b; 2744 PetscErrorCode ierr; 2745 PetscInt i; 2746 2747 PetscFunctionBegin; 2748 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2749 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2750 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2751 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2752 2753 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2754 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2755 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2756 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2757 if (d_nnz) { 2758 for (i=0; i<B->rmap->n; i++) { 2759 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2760 } 2761 } 2762 if (o_nnz) { 2763 for (i=0; i<B->rmap->n; i++) { 2764 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2765 } 2766 } 2767 b = (Mat_MPIAIJ*)B->data; 2768 2769 if (!B->preallocated) { 2770 /* Explicitly create 2 MATSEQAIJ matrices. */ 2771 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2772 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2773 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2774 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2775 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2776 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2777 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2778 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2779 } 2780 2781 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2782 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2783 B->preallocated = PETSC_TRUE; 2784 PetscFunctionReturn(0); 2785 } 2786 EXTERN_C_END 2787 2788 #undef __FUNCT__ 2789 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2790 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2791 { 2792 Mat mat; 2793 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2794 PetscErrorCode ierr; 2795 2796 PetscFunctionBegin; 2797 *newmat = 0; 2798 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2799 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2800 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2801 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2802 a = (Mat_MPIAIJ*)mat->data; 2803 2804 mat->factor = matin->factor; 2805 mat->rmap->bs = matin->rmap->bs; 2806 mat->assembled = PETSC_TRUE; 2807 mat->insertmode = NOT_SET_VALUES; 2808 mat->preallocated = PETSC_TRUE; 2809 2810 a->size = oldmat->size; 2811 a->rank = oldmat->rank; 2812 a->donotstash = oldmat->donotstash; 2813 a->roworiented = oldmat->roworiented; 2814 a->rowindices = 0; 2815 a->rowvalues = 0; 2816 a->getrowactive = PETSC_FALSE; 2817 2818 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2819 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2820 2821 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2822 if (oldmat->colmap) { 2823 #if defined (PETSC_USE_CTABLE) 2824 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2825 #else 2826 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2827 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2828 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2829 #endif 2830 } else a->colmap = 0; 2831 if (oldmat->garray) { 2832 PetscInt len; 2833 len = oldmat->B->cmap->n; 2834 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2835 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2836 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2837 } else a->garray = 0; 2838 2839 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2840 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2841 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2842 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2843 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2844 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2845 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2846 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2847 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2848 *newmat = mat; 2849 PetscFunctionReturn(0); 2850 } 2851 2852 #include "petscsys.h" 2853 2854 #undef __FUNCT__ 2855 #define __FUNCT__ "MatLoad_MPIAIJ" 2856 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2857 { 2858 Mat A; 2859 PetscScalar *vals,*svals; 2860 MPI_Comm comm = ((PetscObject)viewer)->comm; 2861 MPI_Status status; 2862 PetscErrorCode ierr; 2863 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2864 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2865 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2866 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2867 PetscInt cend,cstart,n,*rowners; 2868 int fd; 2869 2870 PetscFunctionBegin; 2871 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2872 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2873 if (!rank) { 2874 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2875 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2876 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2877 } 2878 2879 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2880 M = header[1]; N = header[2]; 2881 /* determine ownership of all rows */ 2882 m = M/size + ((M % size) > rank); 2883 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2884 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2885 2886 /* First process needs enough room for process with most rows */ 2887 if (!rank) { 2888 mmax = rowners[1]; 2889 for (i=2; i<size; i++) { 2890 mmax = PetscMax(mmax,rowners[i]); 2891 } 2892 } else mmax = m; 2893 2894 rowners[0] = 0; 2895 for (i=2; i<=size; i++) { 2896 rowners[i] += rowners[i-1]; 2897 } 2898 rstart = rowners[rank]; 2899 rend = rowners[rank+1]; 2900 2901 /* distribute row lengths to all processors */ 2902 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2903 if (!rank) { 2904 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2905 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2906 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2907 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2908 for (j=0; j<m; j++) { 2909 procsnz[0] += ourlens[j]; 2910 } 2911 for (i=1; i<size; i++) { 2912 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2913 /* calculate the number of nonzeros on each processor */ 2914 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2915 procsnz[i] += rowlengths[j]; 2916 } 2917 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2918 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2919 } 2920 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2921 } else { 2922 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2923 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2924 } 2925 2926 if (!rank) { 2927 /* determine max buffer needed and allocate it */ 2928 maxnz = 0; 2929 for (i=0; i<size; i++) { 2930 maxnz = PetscMax(maxnz,procsnz[i]); 2931 } 2932 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2933 2934 /* read in my part of the matrix column indices */ 2935 nz = procsnz[0]; 2936 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2937 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2938 2939 /* read in every one elses and ship off */ 2940 for (i=1; i<size; i++) { 2941 nz = procsnz[i]; 2942 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2943 mpicnt = PetscMPIIntCast(nz); 2944 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2945 } 2946 ierr = PetscFree(cols);CHKERRQ(ierr); 2947 } else { 2948 /* determine buffer space needed for message */ 2949 nz = 0; 2950 for (i=0; i<m; i++) { 2951 nz += ourlens[i]; 2952 } 2953 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2954 2955 /* receive message of column indices*/ 2956 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2957 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2958 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2959 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2960 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2961 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2962 } 2963 2964 /* determine column ownership if matrix is not square */ 2965 if (N != M) { 2966 n = N/size + ((N % size) > rank); 2967 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2968 cstart = cend - n; 2969 } else { 2970 cstart = rstart; 2971 cend = rend; 2972 n = cend - cstart; 2973 } 2974 2975 /* loop over local rows, determining number of off diagonal entries */ 2976 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2977 jj = 0; 2978 for (i=0; i<m; i++) { 2979 for (j=0; j<ourlens[i]; j++) { 2980 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2981 jj++; 2982 } 2983 } 2984 2985 /* create our matrix */ 2986 for (i=0; i<m; i++) { 2987 ourlens[i] -= offlens[i]; 2988 } 2989 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2990 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2991 ierr = MatSetType(A,type);CHKERRQ(ierr); 2992 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2993 2994 for (i=0; i<m; i++) { 2995 ourlens[i] += offlens[i]; 2996 } 2997 2998 if (!rank) { 2999 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3000 3001 /* read in my part of the matrix numerical values */ 3002 nz = procsnz[0]; 3003 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3004 3005 /* insert into matrix */ 3006 jj = rstart; 3007 smycols = mycols; 3008 svals = vals; 3009 for (i=0; i<m; i++) { 3010 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3011 smycols += ourlens[i]; 3012 svals += ourlens[i]; 3013 jj++; 3014 } 3015 3016 /* read in other processors and ship out */ 3017 for (i=1; i<size; i++) { 3018 nz = procsnz[i]; 3019 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3020 mpicnt = PetscMPIIntCast(nz); 3021 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3022 } 3023 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3024 } else { 3025 /* receive numeric values */ 3026 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3027 3028 /* receive message of values*/ 3029 mpicnt = PetscMPIIntCast(nz); 3030 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3031 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3032 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3033 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3034 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3035 3036 /* insert into matrix */ 3037 jj = rstart; 3038 smycols = mycols; 3039 svals = vals; 3040 for (i=0; i<m; i++) { 3041 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3042 smycols += ourlens[i]; 3043 svals += ourlens[i]; 3044 jj++; 3045 } 3046 } 3047 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3048 ierr = PetscFree(vals);CHKERRQ(ierr); 3049 ierr = PetscFree(mycols);CHKERRQ(ierr); 3050 ierr = PetscFree(rowners);CHKERRQ(ierr); 3051 3052 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3053 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3054 *newmat = A; 3055 PetscFunctionReturn(0); 3056 } 3057 3058 #undef __FUNCT__ 3059 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3060 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3061 { 3062 PetscErrorCode ierr; 3063 IS iscol_local; 3064 PetscInt csize; 3065 3066 PetscFunctionBegin; 3067 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3068 if (call == MAT_REUSE_MATRIX) { 3069 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3070 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3071 } else { 3072 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3073 } 3074 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3075 if (call == MAT_INITIAL_MATRIX) { 3076 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3077 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3078 } 3079 PetscFunctionReturn(0); 3080 } 3081 3082 #undef __FUNCT__ 3083 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3084 /* 3085 Not great since it makes two copies of the submatrix, first an SeqAIJ 3086 in local and then by concatenating the local matrices the end result. 3087 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3088 3089 Note: This requires a sequential iscol with all indices. 3090 */ 3091 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3092 { 3093 PetscErrorCode ierr; 3094 PetscMPIInt rank,size; 3095 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3096 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3097 Mat *local,M,Mreuse; 3098 MatScalar *vwork,*aa; 3099 MPI_Comm comm = ((PetscObject)mat)->comm; 3100 Mat_SeqAIJ *aij; 3101 3102 3103 PetscFunctionBegin; 3104 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3105 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3106 3107 if (call == MAT_REUSE_MATRIX) { 3108 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3109 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3110 local = &Mreuse; 3111 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3112 } else { 3113 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3114 Mreuse = *local; 3115 ierr = PetscFree(local);CHKERRQ(ierr); 3116 } 3117 3118 /* 3119 m - number of local rows 3120 n - number of columns (same on all processors) 3121 rstart - first row in new global matrix generated 3122 */ 3123 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3124 if (call == MAT_INITIAL_MATRIX) { 3125 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3126 ii = aij->i; 3127 jj = aij->j; 3128 3129 /* 3130 Determine the number of non-zeros in the diagonal and off-diagonal 3131 portions of the matrix in order to do correct preallocation 3132 */ 3133 3134 /* first get start and end of "diagonal" columns */ 3135 if (csize == PETSC_DECIDE) { 3136 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3137 if (mglobal == n) { /* square matrix */ 3138 nlocal = m; 3139 } else { 3140 nlocal = n/size + ((n % size) > rank); 3141 } 3142 } else { 3143 nlocal = csize; 3144 } 3145 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3146 rstart = rend - nlocal; 3147 if (rank == size - 1 && rend != n) { 3148 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3149 } 3150 3151 /* next, compute all the lengths */ 3152 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3153 olens = dlens + m; 3154 for (i=0; i<m; i++) { 3155 jend = ii[i+1] - ii[i]; 3156 olen = 0; 3157 dlen = 0; 3158 for (j=0; j<jend; j++) { 3159 if (*jj < rstart || *jj >= rend) olen++; 3160 else dlen++; 3161 jj++; 3162 } 3163 olens[i] = olen; 3164 dlens[i] = dlen; 3165 } 3166 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3167 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3168 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3169 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3170 ierr = PetscFree(dlens);CHKERRQ(ierr); 3171 } else { 3172 PetscInt ml,nl; 3173 3174 M = *newmat; 3175 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3176 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3177 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3178 /* 3179 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3180 rather than the slower MatSetValues(). 3181 */ 3182 M->was_assembled = PETSC_TRUE; 3183 M->assembled = PETSC_FALSE; 3184 } 3185 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3186 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3187 ii = aij->i; 3188 jj = aij->j; 3189 aa = aij->a; 3190 for (i=0; i<m; i++) { 3191 row = rstart + i; 3192 nz = ii[i+1] - ii[i]; 3193 cwork = jj; jj += nz; 3194 vwork = aa; aa += nz; 3195 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3196 } 3197 3198 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3199 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3200 *newmat = M; 3201 3202 /* save submatrix used in processor for next request */ 3203 if (call == MAT_INITIAL_MATRIX) { 3204 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3205 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3206 } 3207 3208 PetscFunctionReturn(0); 3209 } 3210 3211 EXTERN_C_BEGIN 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3214 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3215 { 3216 PetscInt m,cstart, cend,j,nnz,i,d; 3217 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3218 const PetscInt *JJ; 3219 PetscScalar *values; 3220 PetscErrorCode ierr; 3221 3222 PetscFunctionBegin; 3223 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3224 3225 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3226 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3227 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3228 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3229 m = B->rmap->n; 3230 cstart = B->cmap->rstart; 3231 cend = B->cmap->rend; 3232 rstart = B->rmap->rstart; 3233 3234 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3235 o_nnz = d_nnz + m; 3236 3237 #if defined(PETSC_USE_DEBUGGING) 3238 for (i=0; i<m; i++) { 3239 nnz = Ii[i+1]- Ii[i]; 3240 JJ = J + Ii[i]; 3241 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3242 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3243 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3244 for (j=1; j<nnz; j++) { 3245 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3246 } 3247 } 3248 #endif 3249 3250 for (i=0; i<m; i++) { 3251 nnz = Ii[i+1]- Ii[i]; 3252 JJ = J + Ii[i]; 3253 nnz_max = PetscMax(nnz_max,nnz); 3254 for (j=0; j<nnz; j++) { 3255 if (*JJ >= cstart) break; 3256 JJ++; 3257 } 3258 d = 0; 3259 for (; j<nnz; j++) { 3260 if (*JJ++ >= cend) break; 3261 d++; 3262 } 3263 d_nnz[i] = d; 3264 o_nnz[i] = nnz - d; 3265 } 3266 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3267 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3268 3269 if (v) values = (PetscScalar*)v; 3270 else { 3271 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3272 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3273 } 3274 3275 for (i=0; i<m; i++) { 3276 ii = i + rstart; 3277 nnz = Ii[i+1]- Ii[i]; 3278 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3279 } 3280 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3281 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3282 3283 if (!v) { 3284 ierr = PetscFree(values);CHKERRQ(ierr); 3285 } 3286 PetscFunctionReturn(0); 3287 } 3288 EXTERN_C_END 3289 3290 #undef __FUNCT__ 3291 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3292 /*@ 3293 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3294 (the default parallel PETSc format). 3295 3296 Collective on MPI_Comm 3297 3298 Input Parameters: 3299 + B - the matrix 3300 . i - the indices into j for the start of each local row (starts with zero) 3301 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3302 - v - optional values in the matrix 3303 3304 Level: developer 3305 3306 Notes: 3307 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3308 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3309 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3310 3311 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3312 3313 The format which is used for the sparse matrix input, is equivalent to a 3314 row-major ordering.. i.e for the following matrix, the input data expected is 3315 as shown: 3316 3317 1 0 0 3318 2 0 3 P0 3319 ------- 3320 4 5 6 P1 3321 3322 Process0 [P0]: rows_owned=[0,1] 3323 i = {0,1,3} [size = nrow+1 = 2+1] 3324 j = {0,0,2} [size = nz = 6] 3325 v = {1,2,3} [size = nz = 6] 3326 3327 Process1 [P1]: rows_owned=[2] 3328 i = {0,3} [size = nrow+1 = 1+1] 3329 j = {0,1,2} [size = nz = 6] 3330 v = {4,5,6} [size = nz = 6] 3331 3332 The column indices for each row MUST be sorted. 3333 3334 .keywords: matrix, aij, compressed row, sparse, parallel 3335 3336 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3337 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3338 @*/ 3339 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3340 { 3341 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3342 3343 PetscFunctionBegin; 3344 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3345 if (f) { 3346 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3347 } 3348 PetscFunctionReturn(0); 3349 } 3350 3351 #undef __FUNCT__ 3352 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3353 /*@C 3354 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3355 (the default parallel PETSc format). For good matrix assembly performance 3356 the user should preallocate the matrix storage by setting the parameters 3357 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3358 performance can be increased by more than a factor of 50. 3359 3360 Collective on MPI_Comm 3361 3362 Input Parameters: 3363 + A - the matrix 3364 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3365 (same value is used for all local rows) 3366 . d_nnz - array containing the number of nonzeros in the various rows of the 3367 DIAGONAL portion of the local submatrix (possibly different for each row) 3368 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3369 The size of this array is equal to the number of local rows, i.e 'm'. 3370 You must leave room for the diagonal entry even if it is zero. 3371 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3372 submatrix (same value is used for all local rows). 3373 - o_nnz - array containing the number of nonzeros in the various rows of the 3374 OFF-DIAGONAL portion of the local submatrix (possibly different for 3375 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3376 structure. The size of this array is equal to the number 3377 of local rows, i.e 'm'. 3378 3379 If the *_nnz parameter is given then the *_nz parameter is ignored 3380 3381 The AIJ format (also called the Yale sparse matrix format or 3382 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3383 storage. The stored row and column indices begin with zero. See the users manual for details. 3384 3385 The parallel matrix is partitioned such that the first m0 rows belong to 3386 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3387 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3388 3389 The DIAGONAL portion of the local submatrix of a processor can be defined 3390 as the submatrix which is obtained by extraction the part corresponding 3391 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3392 first row that belongs to the processor, and r2 is the last row belonging 3393 to the this processor. This is a square mxm matrix. The remaining portion 3394 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3395 3396 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3397 3398 You can call MatGetInfo() to get information on how effective the preallocation was; 3399 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3400 You can also run with the option -info and look for messages with the string 3401 malloc in them to see if additional memory allocation was needed. 3402 3403 Example usage: 3404 3405 Consider the following 8x8 matrix with 34 non-zero values, that is 3406 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3407 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3408 as follows: 3409 3410 .vb 3411 1 2 0 | 0 3 0 | 0 4 3412 Proc0 0 5 6 | 7 0 0 | 8 0 3413 9 0 10 | 11 0 0 | 12 0 3414 ------------------------------------- 3415 13 0 14 | 15 16 17 | 0 0 3416 Proc1 0 18 0 | 19 20 21 | 0 0 3417 0 0 0 | 22 23 0 | 24 0 3418 ------------------------------------- 3419 Proc2 25 26 27 | 0 0 28 | 29 0 3420 30 0 0 | 31 32 33 | 0 34 3421 .ve 3422 3423 This can be represented as a collection of submatrices as: 3424 3425 .vb 3426 A B C 3427 D E F 3428 G H I 3429 .ve 3430 3431 Where the submatrices A,B,C are owned by proc0, D,E,F are 3432 owned by proc1, G,H,I are owned by proc2. 3433 3434 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3435 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3436 The 'M','N' parameters are 8,8, and have the same values on all procs. 3437 3438 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3439 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3440 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3441 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3442 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3443 matrix, ans [DF] as another SeqAIJ matrix. 3444 3445 When d_nz, o_nz parameters are specified, d_nz storage elements are 3446 allocated for every row of the local diagonal submatrix, and o_nz 3447 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3448 One way to choose d_nz and o_nz is to use the max nonzerors per local 3449 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3450 In this case, the values of d_nz,o_nz are: 3451 .vb 3452 proc0 : dnz = 2, o_nz = 2 3453 proc1 : dnz = 3, o_nz = 2 3454 proc2 : dnz = 1, o_nz = 4 3455 .ve 3456 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3457 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3458 for proc3. i.e we are using 12+15+10=37 storage locations to store 3459 34 values. 3460 3461 When d_nnz, o_nnz parameters are specified, the storage is specified 3462 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3463 In the above case the values for d_nnz,o_nnz are: 3464 .vb 3465 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3466 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3467 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3468 .ve 3469 Here the space allocated is sum of all the above values i.e 34, and 3470 hence pre-allocation is perfect. 3471 3472 Level: intermediate 3473 3474 .keywords: matrix, aij, compressed row, sparse, parallel 3475 3476 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3477 MPIAIJ, MatGetInfo() 3478 @*/ 3479 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3480 { 3481 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3482 3483 PetscFunctionBegin; 3484 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3485 if (f) { 3486 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3487 } 3488 PetscFunctionReturn(0); 3489 } 3490 3491 #undef __FUNCT__ 3492 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3493 /*@ 3494 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3495 CSR format the local rows. 3496 3497 Collective on MPI_Comm 3498 3499 Input Parameters: 3500 + comm - MPI communicator 3501 . m - number of local rows (Cannot be PETSC_DECIDE) 3502 . n - This value should be the same as the local size used in creating the 3503 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3504 calculated if N is given) For square matrices n is almost always m. 3505 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3506 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3507 . i - row indices 3508 . j - column indices 3509 - a - matrix values 3510 3511 Output Parameter: 3512 . mat - the matrix 3513 3514 Level: intermediate 3515 3516 Notes: 3517 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3518 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3519 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3520 3521 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3522 3523 The format which is used for the sparse matrix input, is equivalent to a 3524 row-major ordering.. i.e for the following matrix, the input data expected is 3525 as shown: 3526 3527 1 0 0 3528 2 0 3 P0 3529 ------- 3530 4 5 6 P1 3531 3532 Process0 [P0]: rows_owned=[0,1] 3533 i = {0,1,3} [size = nrow+1 = 2+1] 3534 j = {0,0,2} [size = nz = 6] 3535 v = {1,2,3} [size = nz = 6] 3536 3537 Process1 [P1]: rows_owned=[2] 3538 i = {0,3} [size = nrow+1 = 1+1] 3539 j = {0,1,2} [size = nz = 6] 3540 v = {4,5,6} [size = nz = 6] 3541 3542 .keywords: matrix, aij, compressed row, sparse, parallel 3543 3544 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3545 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3546 @*/ 3547 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3548 { 3549 PetscErrorCode ierr; 3550 3551 PetscFunctionBegin; 3552 if (i[0]) { 3553 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3554 } 3555 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3556 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3557 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3558 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3559 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3560 PetscFunctionReturn(0); 3561 } 3562 3563 #undef __FUNCT__ 3564 #define __FUNCT__ "MatCreateMPIAIJ" 3565 /*@C 3566 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3567 (the default parallel PETSc format). For good matrix assembly performance 3568 the user should preallocate the matrix storage by setting the parameters 3569 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3570 performance can be increased by more than a factor of 50. 3571 3572 Collective on MPI_Comm 3573 3574 Input Parameters: 3575 + comm - MPI communicator 3576 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3577 This value should be the same as the local size used in creating the 3578 y vector for the matrix-vector product y = Ax. 3579 . n - This value should be the same as the local size used in creating the 3580 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3581 calculated if N is given) For square matrices n is almost always m. 3582 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3583 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3584 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3585 (same value is used for all local rows) 3586 . d_nnz - array containing the number of nonzeros in the various rows of the 3587 DIAGONAL portion of the local submatrix (possibly different for each row) 3588 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3589 The size of this array is equal to the number of local rows, i.e 'm'. 3590 You must leave room for the diagonal entry even if it is zero. 3591 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3592 submatrix (same value is used for all local rows). 3593 - o_nnz - array containing the number of nonzeros in the various rows of the 3594 OFF-DIAGONAL portion of the local submatrix (possibly different for 3595 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3596 structure. The size of this array is equal to the number 3597 of local rows, i.e 'm'. 3598 3599 Output Parameter: 3600 . A - the matrix 3601 3602 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3603 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3604 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3605 3606 Notes: 3607 If the *_nnz parameter is given then the *_nz parameter is ignored 3608 3609 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3610 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3611 storage requirements for this matrix. 3612 3613 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3614 processor than it must be used on all processors that share the object for 3615 that argument. 3616 3617 The user MUST specify either the local or global matrix dimensions 3618 (possibly both). 3619 3620 The parallel matrix is partitioned across processors such that the 3621 first m0 rows belong to process 0, the next m1 rows belong to 3622 process 1, the next m2 rows belong to process 2 etc.. where 3623 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3624 values corresponding to [m x N] submatrix. 3625 3626 The columns are logically partitioned with the n0 columns belonging 3627 to 0th partition, the next n1 columns belonging to the next 3628 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3629 3630 The DIAGONAL portion of the local submatrix on any given processor 3631 is the submatrix corresponding to the rows and columns m,n 3632 corresponding to the given processor. i.e diagonal matrix on 3633 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3634 etc. The remaining portion of the local submatrix [m x (N-n)] 3635 constitute the OFF-DIAGONAL portion. The example below better 3636 illustrates this concept. 3637 3638 For a square global matrix we define each processor's diagonal portion 3639 to be its local rows and the corresponding columns (a square submatrix); 3640 each processor's off-diagonal portion encompasses the remainder of the 3641 local matrix (a rectangular submatrix). 3642 3643 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3644 3645 When calling this routine with a single process communicator, a matrix of 3646 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3647 type of communicator, use the construction mechanism: 3648 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3649 3650 By default, this format uses inodes (identical nodes) when possible. 3651 We search for consecutive rows with the same nonzero structure, thereby 3652 reusing matrix information to achieve increased efficiency. 3653 3654 Options Database Keys: 3655 + -mat_no_inode - Do not use inodes 3656 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3657 - -mat_aij_oneindex - Internally use indexing starting at 1 3658 rather than 0. Note that when calling MatSetValues(), 3659 the user still MUST index entries starting at 0! 3660 3661 3662 Example usage: 3663 3664 Consider the following 8x8 matrix with 34 non-zero values, that is 3665 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3666 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3667 as follows: 3668 3669 .vb 3670 1 2 0 | 0 3 0 | 0 4 3671 Proc0 0 5 6 | 7 0 0 | 8 0 3672 9 0 10 | 11 0 0 | 12 0 3673 ------------------------------------- 3674 13 0 14 | 15 16 17 | 0 0 3675 Proc1 0 18 0 | 19 20 21 | 0 0 3676 0 0 0 | 22 23 0 | 24 0 3677 ------------------------------------- 3678 Proc2 25 26 27 | 0 0 28 | 29 0 3679 30 0 0 | 31 32 33 | 0 34 3680 .ve 3681 3682 This can be represented as a collection of submatrices as: 3683 3684 .vb 3685 A B C 3686 D E F 3687 G H I 3688 .ve 3689 3690 Where the submatrices A,B,C are owned by proc0, D,E,F are 3691 owned by proc1, G,H,I are owned by proc2. 3692 3693 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3694 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3695 The 'M','N' parameters are 8,8, and have the same values on all procs. 3696 3697 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3698 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3699 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3700 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3701 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3702 matrix, ans [DF] as another SeqAIJ matrix. 3703 3704 When d_nz, o_nz parameters are specified, d_nz storage elements are 3705 allocated for every row of the local diagonal submatrix, and o_nz 3706 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3707 One way to choose d_nz and o_nz is to use the max nonzerors per local 3708 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3709 In this case, the values of d_nz,o_nz are: 3710 .vb 3711 proc0 : dnz = 2, o_nz = 2 3712 proc1 : dnz = 3, o_nz = 2 3713 proc2 : dnz = 1, o_nz = 4 3714 .ve 3715 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3716 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3717 for proc3. i.e we are using 12+15+10=37 storage locations to store 3718 34 values. 3719 3720 When d_nnz, o_nnz parameters are specified, the storage is specified 3721 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3722 In the above case the values for d_nnz,o_nnz are: 3723 .vb 3724 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3725 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3726 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3727 .ve 3728 Here the space allocated is sum of all the above values i.e 34, and 3729 hence pre-allocation is perfect. 3730 3731 Level: intermediate 3732 3733 .keywords: matrix, aij, compressed row, sparse, parallel 3734 3735 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3736 MPIAIJ, MatCreateMPIAIJWithArrays() 3737 @*/ 3738 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3739 { 3740 PetscErrorCode ierr; 3741 PetscMPIInt size; 3742 3743 PetscFunctionBegin; 3744 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3745 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3746 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3747 if (size > 1) { 3748 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3749 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3750 } else { 3751 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3752 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3753 } 3754 PetscFunctionReturn(0); 3755 } 3756 3757 #undef __FUNCT__ 3758 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3759 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3760 { 3761 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3762 3763 PetscFunctionBegin; 3764 *Ad = a->A; 3765 *Ao = a->B; 3766 *colmap = a->garray; 3767 PetscFunctionReturn(0); 3768 } 3769 3770 #undef __FUNCT__ 3771 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3772 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3773 { 3774 PetscErrorCode ierr; 3775 PetscInt i; 3776 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3777 3778 PetscFunctionBegin; 3779 if (coloring->ctype == IS_COLORING_GLOBAL) { 3780 ISColoringValue *allcolors,*colors; 3781 ISColoring ocoloring; 3782 3783 /* set coloring for diagonal portion */ 3784 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3785 3786 /* set coloring for off-diagonal portion */ 3787 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3788 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3789 for (i=0; i<a->B->cmap->n; i++) { 3790 colors[i] = allcolors[a->garray[i]]; 3791 } 3792 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3793 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3794 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3795 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3796 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3797 ISColoringValue *colors; 3798 PetscInt *larray; 3799 ISColoring ocoloring; 3800 3801 /* set coloring for diagonal portion */ 3802 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3803 for (i=0; i<a->A->cmap->n; i++) { 3804 larray[i] = i + A->cmap->rstart; 3805 } 3806 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3807 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3808 for (i=0; i<a->A->cmap->n; i++) { 3809 colors[i] = coloring->colors[larray[i]]; 3810 } 3811 ierr = PetscFree(larray);CHKERRQ(ierr); 3812 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3813 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3814 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3815 3816 /* set coloring for off-diagonal portion */ 3817 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3818 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3819 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3820 for (i=0; i<a->B->cmap->n; i++) { 3821 colors[i] = coloring->colors[larray[i]]; 3822 } 3823 ierr = PetscFree(larray);CHKERRQ(ierr); 3824 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3825 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3826 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3827 } else { 3828 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3829 } 3830 3831 PetscFunctionReturn(0); 3832 } 3833 3834 #if defined(PETSC_HAVE_ADIC) 3835 #undef __FUNCT__ 3836 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3837 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3838 { 3839 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3840 PetscErrorCode ierr; 3841 3842 PetscFunctionBegin; 3843 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3844 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3845 PetscFunctionReturn(0); 3846 } 3847 #endif 3848 3849 #undef __FUNCT__ 3850 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3851 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3852 { 3853 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3854 PetscErrorCode ierr; 3855 3856 PetscFunctionBegin; 3857 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3858 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3859 PetscFunctionReturn(0); 3860 } 3861 3862 #undef __FUNCT__ 3863 #define __FUNCT__ "MatMerge" 3864 /*@ 3865 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3866 matrices from each processor 3867 3868 Collective on MPI_Comm 3869 3870 Input Parameters: 3871 + comm - the communicators the parallel matrix will live on 3872 . inmat - the input sequential matrices 3873 . n - number of local columns (or PETSC_DECIDE) 3874 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3875 3876 Output Parameter: 3877 . outmat - the parallel matrix generated 3878 3879 Level: advanced 3880 3881 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3882 3883 @*/ 3884 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3885 { 3886 PetscErrorCode ierr; 3887 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3888 PetscInt *indx; 3889 PetscScalar *values; 3890 3891 PetscFunctionBegin; 3892 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3893 if (scall == MAT_INITIAL_MATRIX){ 3894 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3895 if (n == PETSC_DECIDE){ 3896 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3897 } 3898 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3899 rstart -= m; 3900 3901 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3902 for (i=0;i<m;i++) { 3903 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3904 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3905 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3906 } 3907 /* This routine will ONLY return MPIAIJ type matrix */ 3908 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3909 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3910 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3911 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3912 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3913 3914 } else if (scall == MAT_REUSE_MATRIX){ 3915 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3916 } else { 3917 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3918 } 3919 3920 for (i=0;i<m;i++) { 3921 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3922 Ii = i + rstart; 3923 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3924 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3925 } 3926 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3927 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3928 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3929 3930 PetscFunctionReturn(0); 3931 } 3932 3933 #undef __FUNCT__ 3934 #define __FUNCT__ "MatFileSplit" 3935 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3936 { 3937 PetscErrorCode ierr; 3938 PetscMPIInt rank; 3939 PetscInt m,N,i,rstart,nnz; 3940 size_t len; 3941 const PetscInt *indx; 3942 PetscViewer out; 3943 char *name; 3944 Mat B; 3945 const PetscScalar *values; 3946 3947 PetscFunctionBegin; 3948 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3949 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3950 /* Should this be the type of the diagonal block of A? */ 3951 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3952 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3953 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3954 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3955 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3956 for (i=0;i<m;i++) { 3957 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3958 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3959 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3960 } 3961 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3962 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3963 3964 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3965 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3966 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3967 sprintf(name,"%s.%d",outfile,rank); 3968 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3969 ierr = PetscFree(name); 3970 ierr = MatView(B,out);CHKERRQ(ierr); 3971 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3972 ierr = MatDestroy(B);CHKERRQ(ierr); 3973 PetscFunctionReturn(0); 3974 } 3975 3976 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3977 #undef __FUNCT__ 3978 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3979 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3980 { 3981 PetscErrorCode ierr; 3982 Mat_Merge_SeqsToMPI *merge; 3983 PetscContainer container; 3984 3985 PetscFunctionBegin; 3986 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3987 if (container) { 3988 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3989 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3990 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3991 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 4000 4001 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4002 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4003 } 4004 ierr = PetscFree(merge);CHKERRQ(ierr); 4005 4006 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4007 PetscFunctionReturn(0); 4008 } 4009 4010 #include "../src/mat/utils/freespace.h" 4011 #include "petscbt.h" 4012 4013 #undef __FUNCT__ 4014 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4015 /*@C 4016 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4017 matrices from each processor 4018 4019 Collective on MPI_Comm 4020 4021 Input Parameters: 4022 + comm - the communicators the parallel matrix will live on 4023 . seqmat - the input sequential matrices 4024 . m - number of local rows (or PETSC_DECIDE) 4025 . n - number of local columns (or PETSC_DECIDE) 4026 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4027 4028 Output Parameter: 4029 . mpimat - the parallel matrix generated 4030 4031 Level: advanced 4032 4033 Notes: 4034 The dimensions of the sequential matrix in each processor MUST be the same. 4035 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4036 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4037 @*/ 4038 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4039 { 4040 PetscErrorCode ierr; 4041 MPI_Comm comm=((PetscObject)mpimat)->comm; 4042 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4043 PetscMPIInt size,rank,taga,*len_s; 4044 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4045 PetscInt proc,m; 4046 PetscInt **buf_ri,**buf_rj; 4047 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4048 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4049 MPI_Request *s_waits,*r_waits; 4050 MPI_Status *status; 4051 MatScalar *aa=a->a; 4052 MatScalar **abuf_r,*ba_i; 4053 Mat_Merge_SeqsToMPI *merge; 4054 PetscContainer container; 4055 4056 PetscFunctionBegin; 4057 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4058 4059 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4060 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4061 4062 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4063 if (container) { 4064 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4065 } 4066 bi = merge->bi; 4067 bj = merge->bj; 4068 buf_ri = merge->buf_ri; 4069 buf_rj = merge->buf_rj; 4070 4071 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4072 owners = merge->rowmap.range; 4073 len_s = merge->len_s; 4074 4075 /* send and recv matrix values */ 4076 /*-----------------------------*/ 4077 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4078 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4079 4080 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4081 for (proc=0,k=0; proc<size; proc++){ 4082 if (!len_s[proc]) continue; 4083 i = owners[proc]; 4084 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4085 k++; 4086 } 4087 4088 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4089 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4090 ierr = PetscFree(status);CHKERRQ(ierr); 4091 4092 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4093 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4094 4095 /* insert mat values of mpimat */ 4096 /*----------------------------*/ 4097 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4098 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4099 nextrow = buf_ri_k + merge->nrecv; 4100 nextai = nextrow + merge->nrecv; 4101 4102 for (k=0; k<merge->nrecv; k++){ 4103 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4104 nrows = *(buf_ri_k[k]); 4105 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4106 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4107 } 4108 4109 /* set values of ba */ 4110 m = merge->rowmap.n; 4111 for (i=0; i<m; i++) { 4112 arow = owners[rank] + i; 4113 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4114 bnzi = bi[i+1] - bi[i]; 4115 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4116 4117 /* add local non-zero vals of this proc's seqmat into ba */ 4118 anzi = ai[arow+1] - ai[arow]; 4119 aj = a->j + ai[arow]; 4120 aa = a->a + ai[arow]; 4121 nextaj = 0; 4122 for (j=0; nextaj<anzi; j++){ 4123 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4124 ba_i[j] += aa[nextaj++]; 4125 } 4126 } 4127 4128 /* add received vals into ba */ 4129 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4130 /* i-th row */ 4131 if (i == *nextrow[k]) { 4132 anzi = *(nextai[k]+1) - *nextai[k]; 4133 aj = buf_rj[k] + *(nextai[k]); 4134 aa = abuf_r[k] + *(nextai[k]); 4135 nextaj = 0; 4136 for (j=0; nextaj<anzi; j++){ 4137 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4138 ba_i[j] += aa[nextaj++]; 4139 } 4140 } 4141 nextrow[k]++; nextai[k]++; 4142 } 4143 } 4144 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4145 } 4146 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4147 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4148 4149 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4150 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4151 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4152 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4153 PetscFunctionReturn(0); 4154 } 4155 4156 #undef __FUNCT__ 4157 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4158 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4159 { 4160 PetscErrorCode ierr; 4161 Mat B_mpi; 4162 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4163 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4164 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4165 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4166 PetscInt len,proc,*dnz,*onz; 4167 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4168 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4169 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4170 MPI_Status *status; 4171 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4172 PetscBT lnkbt; 4173 Mat_Merge_SeqsToMPI *merge; 4174 PetscContainer container; 4175 4176 PetscFunctionBegin; 4177 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4178 4179 /* make sure it is a PETSc comm */ 4180 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4182 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4183 4184 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4185 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4186 4187 /* determine row ownership */ 4188 /*---------------------------------------------------------*/ 4189 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4190 merge->rowmap.n = m; 4191 merge->rowmap.N = M; 4192 merge->rowmap.bs = 1; 4193 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4194 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4195 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4196 4197 m = merge->rowmap.n; 4198 M = merge->rowmap.N; 4199 owners = merge->rowmap.range; 4200 4201 /* determine the number of messages to send, their lengths */ 4202 /*---------------------------------------------------------*/ 4203 len_s = merge->len_s; 4204 4205 len = 0; /* length of buf_si[] */ 4206 merge->nsend = 0; 4207 for (proc=0; proc<size; proc++){ 4208 len_si[proc] = 0; 4209 if (proc == rank){ 4210 len_s[proc] = 0; 4211 } else { 4212 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4213 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4214 } 4215 if (len_s[proc]) { 4216 merge->nsend++; 4217 nrows = 0; 4218 for (i=owners[proc]; i<owners[proc+1]; i++){ 4219 if (ai[i+1] > ai[i]) nrows++; 4220 } 4221 len_si[proc] = 2*(nrows+1); 4222 len += len_si[proc]; 4223 } 4224 } 4225 4226 /* determine the number and length of messages to receive for ij-structure */ 4227 /*-------------------------------------------------------------------------*/ 4228 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4229 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4230 4231 /* post the Irecv of j-structure */ 4232 /*-------------------------------*/ 4233 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4234 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4235 4236 /* post the Isend of j-structure */ 4237 /*--------------------------------*/ 4238 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4239 sj_waits = si_waits + merge->nsend; 4240 4241 for (proc=0, k=0; proc<size; proc++){ 4242 if (!len_s[proc]) continue; 4243 i = owners[proc]; 4244 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4245 k++; 4246 } 4247 4248 /* receives and sends of j-structure are complete */ 4249 /*------------------------------------------------*/ 4250 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4251 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4252 4253 /* send and recv i-structure */ 4254 /*---------------------------*/ 4255 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4256 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4257 4258 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4259 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4260 for (proc=0,k=0; proc<size; proc++){ 4261 if (!len_s[proc]) continue; 4262 /* form outgoing message for i-structure: 4263 buf_si[0]: nrows to be sent 4264 [1:nrows]: row index (global) 4265 [nrows+1:2*nrows+1]: i-structure index 4266 */ 4267 /*-------------------------------------------*/ 4268 nrows = len_si[proc]/2 - 1; 4269 buf_si_i = buf_si + nrows+1; 4270 buf_si[0] = nrows; 4271 buf_si_i[0] = 0; 4272 nrows = 0; 4273 for (i=owners[proc]; i<owners[proc+1]; i++){ 4274 anzi = ai[i+1] - ai[i]; 4275 if (anzi) { 4276 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4277 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4278 nrows++; 4279 } 4280 } 4281 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4282 k++; 4283 buf_si += len_si[proc]; 4284 } 4285 4286 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4287 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4288 4289 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4290 for (i=0; i<merge->nrecv; i++){ 4291 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4292 } 4293 4294 ierr = PetscFree(len_si);CHKERRQ(ierr); 4295 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4296 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4297 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4298 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4299 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4300 ierr = PetscFree(status);CHKERRQ(ierr); 4301 4302 /* compute a local seq matrix in each processor */ 4303 /*----------------------------------------------*/ 4304 /* allocate bi array and free space for accumulating nonzero column info */ 4305 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4306 bi[0] = 0; 4307 4308 /* create and initialize a linked list */ 4309 nlnk = N+1; 4310 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4311 4312 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4313 len = 0; 4314 len = ai[owners[rank+1]] - ai[owners[rank]]; 4315 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4316 current_space = free_space; 4317 4318 /* determine symbolic info for each local row */ 4319 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4320 nextrow = buf_ri_k + merge->nrecv; 4321 nextai = nextrow + merge->nrecv; 4322 for (k=0; k<merge->nrecv; k++){ 4323 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4324 nrows = *buf_ri_k[k]; 4325 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4326 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4327 } 4328 4329 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4330 len = 0; 4331 for (i=0;i<m;i++) { 4332 bnzi = 0; 4333 /* add local non-zero cols of this proc's seqmat into lnk */ 4334 arow = owners[rank] + i; 4335 anzi = ai[arow+1] - ai[arow]; 4336 aj = a->j + ai[arow]; 4337 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4338 bnzi += nlnk; 4339 /* add received col data into lnk */ 4340 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4341 if (i == *nextrow[k]) { /* i-th row */ 4342 anzi = *(nextai[k]+1) - *nextai[k]; 4343 aj = buf_rj[k] + *nextai[k]; 4344 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4345 bnzi += nlnk; 4346 nextrow[k]++; nextai[k]++; 4347 } 4348 } 4349 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4350 4351 /* if free space is not available, make more free space */ 4352 if (current_space->local_remaining<bnzi) { 4353 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4354 nspacedouble++; 4355 } 4356 /* copy data into free space, then initialize lnk */ 4357 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4358 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4359 4360 current_space->array += bnzi; 4361 current_space->local_used += bnzi; 4362 current_space->local_remaining -= bnzi; 4363 4364 bi[i+1] = bi[i] + bnzi; 4365 } 4366 4367 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4368 4369 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4370 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4371 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4372 4373 /* create symbolic parallel matrix B_mpi */ 4374 /*---------------------------------------*/ 4375 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4376 if (n==PETSC_DECIDE) { 4377 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4378 } else { 4379 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4380 } 4381 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4382 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4383 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4384 4385 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4386 B_mpi->assembled = PETSC_FALSE; 4387 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4388 merge->bi = bi; 4389 merge->bj = bj; 4390 merge->buf_ri = buf_ri; 4391 merge->buf_rj = buf_rj; 4392 merge->coi = PETSC_NULL; 4393 merge->coj = PETSC_NULL; 4394 merge->owners_co = PETSC_NULL; 4395 4396 /* attach the supporting struct to B_mpi for reuse */ 4397 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4398 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4399 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4400 *mpimat = B_mpi; 4401 4402 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4403 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4404 PetscFunctionReturn(0); 4405 } 4406 4407 #undef __FUNCT__ 4408 #define __FUNCT__ "MatMerge_SeqsToMPI" 4409 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4410 { 4411 PetscErrorCode ierr; 4412 4413 PetscFunctionBegin; 4414 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4415 if (scall == MAT_INITIAL_MATRIX){ 4416 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4417 } 4418 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4419 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4420 PetscFunctionReturn(0); 4421 } 4422 4423 #undef __FUNCT__ 4424 #define __FUNCT__ "MatGetLocalMat" 4425 /*@ 4426 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4427 4428 Not Collective 4429 4430 Input Parameters: 4431 + A - the matrix 4432 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4433 4434 Output Parameter: 4435 . A_loc - the local sequential matrix generated 4436 4437 Level: developer 4438 4439 @*/ 4440 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4441 { 4442 PetscErrorCode ierr; 4443 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4444 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4445 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4446 MatScalar *aa=a->a,*ba=b->a,*cam; 4447 PetscScalar *ca; 4448 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4449 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4450 4451 PetscFunctionBegin; 4452 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4453 if (scall == MAT_INITIAL_MATRIX){ 4454 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4455 ci[0] = 0; 4456 for (i=0; i<am; i++){ 4457 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4458 } 4459 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4460 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4461 k = 0; 4462 for (i=0; i<am; i++) { 4463 ncols_o = bi[i+1] - bi[i]; 4464 ncols_d = ai[i+1] - ai[i]; 4465 /* off-diagonal portion of A */ 4466 for (jo=0; jo<ncols_o; jo++) { 4467 col = cmap[*bj]; 4468 if (col >= cstart) break; 4469 cj[k] = col; bj++; 4470 ca[k++] = *ba++; 4471 } 4472 /* diagonal portion of A */ 4473 for (j=0; j<ncols_d; j++) { 4474 cj[k] = cstart + *aj++; 4475 ca[k++] = *aa++; 4476 } 4477 /* off-diagonal portion of A */ 4478 for (j=jo; j<ncols_o; j++) { 4479 cj[k] = cmap[*bj++]; 4480 ca[k++] = *ba++; 4481 } 4482 } 4483 /* put together the new matrix */ 4484 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4485 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4486 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4487 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4488 mat->free_a = PETSC_TRUE; 4489 mat->free_ij = PETSC_TRUE; 4490 mat->nonew = 0; 4491 } else if (scall == MAT_REUSE_MATRIX){ 4492 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4493 ci = mat->i; cj = mat->j; cam = mat->a; 4494 for (i=0; i<am; i++) { 4495 /* off-diagonal portion of A */ 4496 ncols_o = bi[i+1] - bi[i]; 4497 for (jo=0; jo<ncols_o; jo++) { 4498 col = cmap[*bj]; 4499 if (col >= cstart) break; 4500 *cam++ = *ba++; bj++; 4501 } 4502 /* diagonal portion of A */ 4503 ncols_d = ai[i+1] - ai[i]; 4504 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4505 /* off-diagonal portion of A */ 4506 for (j=jo; j<ncols_o; j++) { 4507 *cam++ = *ba++; bj++; 4508 } 4509 } 4510 } else { 4511 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4512 } 4513 4514 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4515 PetscFunctionReturn(0); 4516 } 4517 4518 #undef __FUNCT__ 4519 #define __FUNCT__ "MatGetLocalMatCondensed" 4520 /*@C 4521 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4522 4523 Not Collective 4524 4525 Input Parameters: 4526 + A - the matrix 4527 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4528 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4529 4530 Output Parameter: 4531 . A_loc - the local sequential matrix generated 4532 4533 Level: developer 4534 4535 @*/ 4536 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4537 { 4538 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4539 PetscErrorCode ierr; 4540 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4541 IS isrowa,iscola; 4542 Mat *aloc; 4543 4544 PetscFunctionBegin; 4545 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4546 if (!row){ 4547 start = A->rmap->rstart; end = A->rmap->rend; 4548 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4549 } else { 4550 isrowa = *row; 4551 } 4552 if (!col){ 4553 start = A->cmap->rstart; 4554 cmap = a->garray; 4555 nzA = a->A->cmap->n; 4556 nzB = a->B->cmap->n; 4557 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4558 ncols = 0; 4559 for (i=0; i<nzB; i++) { 4560 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4561 else break; 4562 } 4563 imark = i; 4564 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4565 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4566 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4567 ierr = PetscFree(idx);CHKERRQ(ierr); 4568 } else { 4569 iscola = *col; 4570 } 4571 if (scall != MAT_INITIAL_MATRIX){ 4572 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4573 aloc[0] = *A_loc; 4574 } 4575 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4576 *A_loc = aloc[0]; 4577 ierr = PetscFree(aloc);CHKERRQ(ierr); 4578 if (!row){ 4579 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4580 } 4581 if (!col){ 4582 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4583 } 4584 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4585 PetscFunctionReturn(0); 4586 } 4587 4588 #undef __FUNCT__ 4589 #define __FUNCT__ "MatGetBrowsOfAcols" 4590 /*@C 4591 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4592 4593 Collective on Mat 4594 4595 Input Parameters: 4596 + A,B - the matrices in mpiaij format 4597 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4598 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4599 4600 Output Parameter: 4601 + rowb, colb - index sets of rows and columns of B to extract 4602 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4603 - B_seq - the sequential matrix generated 4604 4605 Level: developer 4606 4607 @*/ 4608 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4609 { 4610 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4611 PetscErrorCode ierr; 4612 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4613 IS isrowb,iscolb; 4614 Mat *bseq; 4615 4616 PetscFunctionBegin; 4617 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4618 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4619 } 4620 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4621 4622 if (scall == MAT_INITIAL_MATRIX){ 4623 start = A->cmap->rstart; 4624 cmap = a->garray; 4625 nzA = a->A->cmap->n; 4626 nzB = a->B->cmap->n; 4627 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4628 ncols = 0; 4629 for (i=0; i<nzB; i++) { /* row < local row index */ 4630 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4631 else break; 4632 } 4633 imark = i; 4634 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4635 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4636 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4637 ierr = PetscFree(idx);CHKERRQ(ierr); 4638 *brstart = imark; 4639 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4640 } else { 4641 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4642 isrowb = *rowb; iscolb = *colb; 4643 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4644 bseq[0] = *B_seq; 4645 } 4646 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4647 *B_seq = bseq[0]; 4648 ierr = PetscFree(bseq);CHKERRQ(ierr); 4649 if (!rowb){ 4650 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4651 } else { 4652 *rowb = isrowb; 4653 } 4654 if (!colb){ 4655 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4656 } else { 4657 *colb = iscolb; 4658 } 4659 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4660 PetscFunctionReturn(0); 4661 } 4662 4663 #undef __FUNCT__ 4664 #define __FUNCT__ "MatGetBrowsOfAoCols" 4665 /*@C 4666 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4667 of the OFF-DIAGONAL portion of local A 4668 4669 Collective on Mat 4670 4671 Input Parameters: 4672 + A,B - the matrices in mpiaij format 4673 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4674 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4675 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4676 4677 Output Parameter: 4678 + B_oth - the sequential matrix generated 4679 4680 Level: developer 4681 4682 @*/ 4683 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4684 { 4685 VecScatter_MPI_General *gen_to,*gen_from; 4686 PetscErrorCode ierr; 4687 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4688 Mat_SeqAIJ *b_oth; 4689 VecScatter ctx=a->Mvctx; 4690 MPI_Comm comm=((PetscObject)ctx)->comm; 4691 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4692 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4693 PetscScalar *rvalues,*svalues; 4694 MatScalar *b_otha,*bufa,*bufA; 4695 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4696 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4697 MPI_Status *sstatus,rstatus; 4698 PetscMPIInt jj; 4699 PetscInt *cols,sbs,rbs; 4700 PetscScalar *vals; 4701 4702 PetscFunctionBegin; 4703 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4704 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4705 } 4706 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4707 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4708 4709 gen_to = (VecScatter_MPI_General*)ctx->todata; 4710 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4711 rvalues = gen_from->values; /* holds the length of receiving row */ 4712 svalues = gen_to->values; /* holds the length of sending row */ 4713 nrecvs = gen_from->n; 4714 nsends = gen_to->n; 4715 4716 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4717 srow = gen_to->indices; /* local row index to be sent */ 4718 sstarts = gen_to->starts; 4719 sprocs = gen_to->procs; 4720 sstatus = gen_to->sstatus; 4721 sbs = gen_to->bs; 4722 rstarts = gen_from->starts; 4723 rprocs = gen_from->procs; 4724 rbs = gen_from->bs; 4725 4726 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4727 if (scall == MAT_INITIAL_MATRIX){ 4728 /* i-array */ 4729 /*---------*/ 4730 /* post receives */ 4731 for (i=0; i<nrecvs; i++){ 4732 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4733 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4734 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4735 } 4736 4737 /* pack the outgoing message */ 4738 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4739 rstartsj = sstartsj + nsends +1; 4740 sstartsj[0] = 0; rstartsj[0] = 0; 4741 len = 0; /* total length of j or a array to be sent */ 4742 k = 0; 4743 for (i=0; i<nsends; i++){ 4744 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4745 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4746 for (j=0; j<nrows; j++) { 4747 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4748 for (l=0; l<sbs; l++){ 4749 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4750 rowlen[j*sbs+l] = ncols; 4751 len += ncols; 4752 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4753 } 4754 k++; 4755 } 4756 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4757 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4758 } 4759 /* recvs and sends of i-array are completed */ 4760 i = nrecvs; 4761 while (i--) { 4762 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4763 } 4764 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4765 4766 /* allocate buffers for sending j and a arrays */ 4767 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4768 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4769 4770 /* create i-array of B_oth */ 4771 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4772 b_othi[0] = 0; 4773 len = 0; /* total length of j or a array to be received */ 4774 k = 0; 4775 for (i=0; i<nrecvs; i++){ 4776 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4777 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4778 for (j=0; j<nrows; j++) { 4779 b_othi[k+1] = b_othi[k] + rowlen[j]; 4780 len += rowlen[j]; k++; 4781 } 4782 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4783 } 4784 4785 /* allocate space for j and a arrrays of B_oth */ 4786 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4787 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4788 4789 /* j-array */ 4790 /*---------*/ 4791 /* post receives of j-array */ 4792 for (i=0; i<nrecvs; i++){ 4793 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4794 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4795 } 4796 4797 /* pack the outgoing message j-array */ 4798 k = 0; 4799 for (i=0; i<nsends; i++){ 4800 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4801 bufJ = bufj+sstartsj[i]; 4802 for (j=0; j<nrows; j++) { 4803 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4804 for (ll=0; ll<sbs; ll++){ 4805 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4806 for (l=0; l<ncols; l++){ 4807 *bufJ++ = cols[l]; 4808 } 4809 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4810 } 4811 } 4812 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4813 } 4814 4815 /* recvs and sends of j-array are completed */ 4816 i = nrecvs; 4817 while (i--) { 4818 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4819 } 4820 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4821 } else if (scall == MAT_REUSE_MATRIX){ 4822 sstartsj = *startsj; 4823 rstartsj = sstartsj + nsends +1; 4824 bufa = *bufa_ptr; 4825 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4826 b_otha = b_oth->a; 4827 } else { 4828 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4829 } 4830 4831 /* a-array */ 4832 /*---------*/ 4833 /* post receives of a-array */ 4834 for (i=0; i<nrecvs; i++){ 4835 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4836 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4837 } 4838 4839 /* pack the outgoing message a-array */ 4840 k = 0; 4841 for (i=0; i<nsends; i++){ 4842 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4843 bufA = bufa+sstartsj[i]; 4844 for (j=0; j<nrows; j++) { 4845 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4846 for (ll=0; ll<sbs; ll++){ 4847 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4848 for (l=0; l<ncols; l++){ 4849 *bufA++ = vals[l]; 4850 } 4851 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4852 } 4853 } 4854 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4855 } 4856 /* recvs and sends of a-array are completed */ 4857 i = nrecvs; 4858 while (i--) { 4859 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4860 } 4861 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4862 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4863 4864 if (scall == MAT_INITIAL_MATRIX){ 4865 /* put together the new matrix */ 4866 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4867 4868 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4869 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4870 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4871 b_oth->free_a = PETSC_TRUE; 4872 b_oth->free_ij = PETSC_TRUE; 4873 b_oth->nonew = 0; 4874 4875 ierr = PetscFree(bufj);CHKERRQ(ierr); 4876 if (!startsj || !bufa_ptr){ 4877 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4878 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4879 } else { 4880 *startsj = sstartsj; 4881 *bufa_ptr = bufa; 4882 } 4883 } 4884 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4885 PetscFunctionReturn(0); 4886 } 4887 4888 #undef __FUNCT__ 4889 #define __FUNCT__ "MatGetCommunicationStructs" 4890 /*@C 4891 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4892 4893 Not Collective 4894 4895 Input Parameters: 4896 . A - The matrix in mpiaij format 4897 4898 Output Parameter: 4899 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4900 . colmap - A map from global column index to local index into lvec 4901 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4902 4903 Level: developer 4904 4905 @*/ 4906 #if defined (PETSC_USE_CTABLE) 4907 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4908 #else 4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4910 #endif 4911 { 4912 Mat_MPIAIJ *a; 4913 4914 PetscFunctionBegin; 4915 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4916 PetscValidPointer(lvec, 2) 4917 PetscValidPointer(colmap, 3) 4918 PetscValidPointer(multScatter, 4) 4919 a = (Mat_MPIAIJ *) A->data; 4920 if (lvec) *lvec = a->lvec; 4921 if (colmap) *colmap = a->colmap; 4922 if (multScatter) *multScatter = a->Mvctx; 4923 PetscFunctionReturn(0); 4924 } 4925 4926 EXTERN_C_BEGIN 4927 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4928 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4929 EXTERN_C_END 4930 4931 #include "../src/mat/impls/dense/mpi/mpidense.h" 4932 4933 #undef __FUNCT__ 4934 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4935 /* 4936 Computes (B'*A')' since computing B*A directly is untenable 4937 4938 n p p 4939 ( ) ( ) ( ) 4940 m ( A ) * n ( B ) = m ( C ) 4941 ( ) ( ) ( ) 4942 4943 */ 4944 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4945 { 4946 PetscErrorCode ierr; 4947 Mat At,Bt,Ct; 4948 4949 PetscFunctionBegin; 4950 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4951 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4952 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4953 ierr = MatDestroy(At);CHKERRQ(ierr); 4954 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4955 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4956 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4957 PetscFunctionReturn(0); 4958 } 4959 4960 #undef __FUNCT__ 4961 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4962 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4963 { 4964 PetscErrorCode ierr; 4965 PetscInt m=A->rmap->n,n=B->cmap->n; 4966 Mat Cmat; 4967 4968 PetscFunctionBegin; 4969 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4970 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4971 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4972 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4973 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4974 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4975 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4976 *C = Cmat; 4977 PetscFunctionReturn(0); 4978 } 4979 4980 /* ----------------------------------------------------------------*/ 4981 #undef __FUNCT__ 4982 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4983 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4984 { 4985 PetscErrorCode ierr; 4986 4987 PetscFunctionBegin; 4988 if (scall == MAT_INITIAL_MATRIX){ 4989 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4990 } 4991 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4992 PetscFunctionReturn(0); 4993 } 4994 4995 EXTERN_C_BEGIN 4996 #if defined(PETSC_HAVE_MUMPS) 4997 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4998 #endif 4999 #if defined(PETSC_HAVE_PASTIX) 5000 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5001 #endif 5002 #if defined(PETSC_HAVE_SUPERLU_DIST) 5003 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5004 #endif 5005 #if defined(PETSC_HAVE_SPOOLES) 5006 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5007 #endif 5008 EXTERN_C_END 5009 5010 /*MC 5011 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5012 5013 Options Database Keys: 5014 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5015 5016 Level: beginner 5017 5018 .seealso: MatCreateMPIAIJ() 5019 M*/ 5020 5021 EXTERN_C_BEGIN 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "MatCreate_MPIAIJ" 5024 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5025 { 5026 Mat_MPIAIJ *b; 5027 PetscErrorCode ierr; 5028 PetscMPIInt size; 5029 5030 PetscFunctionBegin; 5031 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5032 5033 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5034 B->data = (void*)b; 5035 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5036 B->rmap->bs = 1; 5037 B->assembled = PETSC_FALSE; 5038 B->mapping = 0; 5039 5040 B->insertmode = NOT_SET_VALUES; 5041 b->size = size; 5042 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5043 5044 /* build cache for off array entries formed */ 5045 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5046 b->donotstash = PETSC_FALSE; 5047 b->colmap = 0; 5048 b->garray = 0; 5049 b->roworiented = PETSC_TRUE; 5050 5051 /* stuff used for matrix vector multiply */ 5052 b->lvec = PETSC_NULL; 5053 b->Mvctx = PETSC_NULL; 5054 5055 /* stuff for MatGetRow() */ 5056 b->rowindices = 0; 5057 b->rowvalues = 0; 5058 b->getrowactive = PETSC_FALSE; 5059 5060 #if defined(PETSC_HAVE_SPOOLES) 5061 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 5062 "MatGetFactor_mpiaij_spooles", 5063 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5064 #endif 5065 #if defined(PETSC_HAVE_MUMPS) 5066 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 5067 "MatGetFactor_mpiaij_mumps", 5068 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5069 #endif 5070 #if defined(PETSC_HAVE_PASTIX) 5071 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 5072 "MatGetFactor_mpiaij_pastix", 5073 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5074 #endif 5075 #if defined(PETSC_HAVE_SUPERLU_DIST) 5076 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 5077 "MatGetFactor_mpiaij_superlu_dist", 5078 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5079 #endif 5080 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5081 "MatStoreValues_MPIAIJ", 5082 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5083 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5084 "MatRetrieveValues_MPIAIJ", 5085 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5086 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5087 "MatGetDiagonalBlock_MPIAIJ", 5088 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5090 "MatIsTranspose_MPIAIJ", 5091 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5092 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5093 "MatMPIAIJSetPreallocation_MPIAIJ", 5094 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5096 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5097 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5098 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5099 "MatDiagonalScaleLocal_MPIAIJ", 5100 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5102 "MatConvert_MPIAIJ_MPICSRPERM", 5103 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5104 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5105 "MatConvert_MPIAIJ_MPICRL", 5106 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5108 "MatMatMult_MPIDense_MPIAIJ", 5109 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5110 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5111 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5112 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5113 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5114 "MatMatMultNumeric_MPIDense_MPIAIJ", 5115 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5116 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5117 PetscFunctionReturn(0); 5118 } 5119 EXTERN_C_END 5120 5121 #undef __FUNCT__ 5122 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5123 /*@ 5124 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5125 and "off-diagonal" part of the matrix in CSR format. 5126 5127 Collective on MPI_Comm 5128 5129 Input Parameters: 5130 + comm - MPI communicator 5131 . m - number of local rows (Cannot be PETSC_DECIDE) 5132 . n - This value should be the same as the local size used in creating the 5133 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5134 calculated if N is given) For square matrices n is almost always m. 5135 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5136 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5137 . i - row indices for "diagonal" portion of matrix 5138 . j - column indices 5139 . a - matrix values 5140 . oi - row indices for "off-diagonal" portion of matrix 5141 . oj - column indices 5142 - oa - matrix values 5143 5144 Output Parameter: 5145 . mat - the matrix 5146 5147 Level: advanced 5148 5149 Notes: 5150 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5151 5152 The i and j indices are 0 based 5153 5154 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5155 5156 5157 .keywords: matrix, aij, compressed row, sparse, parallel 5158 5159 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5160 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5161 @*/ 5162 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5163 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5164 { 5165 PetscErrorCode ierr; 5166 Mat_MPIAIJ *maij; 5167 5168 PetscFunctionBegin; 5169 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5170 if (i[0]) { 5171 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5172 } 5173 if (oi[0]) { 5174 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5175 } 5176 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5177 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5178 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5179 maij = (Mat_MPIAIJ*) (*mat)->data; 5180 maij->donotstash = PETSC_TRUE; 5181 (*mat)->preallocated = PETSC_TRUE; 5182 5183 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5184 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5185 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5186 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5187 5188 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5189 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5190 5191 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5192 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5193 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5194 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5195 5196 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5197 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5198 PetscFunctionReturn(0); 5199 } 5200 5201 /* 5202 Special version for direct calls from Fortran 5203 */ 5204 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5205 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5206 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5207 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5208 #endif 5209 5210 /* Change these macros so can be used in void function */ 5211 #undef CHKERRQ 5212 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5213 #undef SETERRQ2 5214 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5215 #undef SETERRQ 5216 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5217 5218 EXTERN_C_BEGIN 5219 #undef __FUNCT__ 5220 #define __FUNCT__ "matsetvaluesmpiaij_" 5221 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5222 { 5223 Mat mat = *mmat; 5224 PetscInt m = *mm, n = *mn; 5225 InsertMode addv = *maddv; 5226 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5227 PetscScalar value; 5228 PetscErrorCode ierr; 5229 5230 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5231 if (mat->insertmode == NOT_SET_VALUES) { 5232 mat->insertmode = addv; 5233 } 5234 #if defined(PETSC_USE_DEBUG) 5235 else if (mat->insertmode != addv) { 5236 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5237 } 5238 #endif 5239 { 5240 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5241 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5242 PetscTruth roworiented = aij->roworiented; 5243 5244 /* Some Variables required in the macro */ 5245 Mat A = aij->A; 5246 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5247 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5248 MatScalar *aa = a->a; 5249 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5250 Mat B = aij->B; 5251 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5252 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5253 MatScalar *ba = b->a; 5254 5255 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5256 PetscInt nonew = a->nonew; 5257 MatScalar *ap1,*ap2; 5258 5259 PetscFunctionBegin; 5260 for (i=0; i<m; i++) { 5261 if (im[i] < 0) continue; 5262 #if defined(PETSC_USE_DEBUG) 5263 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5264 #endif 5265 if (im[i] >= rstart && im[i] < rend) { 5266 row = im[i] - rstart; 5267 lastcol1 = -1; 5268 rp1 = aj + ai[row]; 5269 ap1 = aa + ai[row]; 5270 rmax1 = aimax[row]; 5271 nrow1 = ailen[row]; 5272 low1 = 0; 5273 high1 = nrow1; 5274 lastcol2 = -1; 5275 rp2 = bj + bi[row]; 5276 ap2 = ba + bi[row]; 5277 rmax2 = bimax[row]; 5278 nrow2 = bilen[row]; 5279 low2 = 0; 5280 high2 = nrow2; 5281 5282 for (j=0; j<n; j++) { 5283 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5284 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5285 if (in[j] >= cstart && in[j] < cend){ 5286 col = in[j] - cstart; 5287 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5288 } else if (in[j] < 0) continue; 5289 #if defined(PETSC_USE_DEBUG) 5290 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5291 #endif 5292 else { 5293 if (mat->was_assembled) { 5294 if (!aij->colmap) { 5295 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5296 } 5297 #if defined (PETSC_USE_CTABLE) 5298 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5299 col--; 5300 #else 5301 col = aij->colmap[in[j]] - 1; 5302 #endif 5303 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5304 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5305 col = in[j]; 5306 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5307 B = aij->B; 5308 b = (Mat_SeqAIJ*)B->data; 5309 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5310 rp2 = bj + bi[row]; 5311 ap2 = ba + bi[row]; 5312 rmax2 = bimax[row]; 5313 nrow2 = bilen[row]; 5314 low2 = 0; 5315 high2 = nrow2; 5316 bm = aij->B->rmap->n; 5317 ba = b->a; 5318 } 5319 } else col = in[j]; 5320 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5321 } 5322 } 5323 } else { 5324 if (!aij->donotstash) { 5325 if (roworiented) { 5326 if (ignorezeroentries && v[i*n] == 0.0) continue; 5327 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5328 } else { 5329 if (ignorezeroentries && v[i] == 0.0) continue; 5330 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5331 } 5332 } 5333 } 5334 }} 5335 PetscFunctionReturnVoid(); 5336 } 5337 EXTERN_C_END 5338 5339