1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap.n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap.n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap.n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap.n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 311 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap.n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 415 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap.range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap.n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap.N; 932 header[2] = mat->cmap.N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap.n; 938 range = mat->rmap.range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap.n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap.n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap.n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap.n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 1114 1115 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1116 if (!rank) { 1117 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1118 } else { 1119 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1120 } 1121 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1122 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1123 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1125 1126 /* copy over the A part */ 1127 Aloc = (Mat_SeqAIJ*)aij->A->data; 1128 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1129 row = mat->rmap.rstart; 1130 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 1131 for (i=0; i<m; i++) { 1132 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1133 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1134 } 1135 aj = Aloc->j; 1136 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 1137 1138 /* copy over the B part */ 1139 Aloc = (Mat_SeqAIJ*)aij->B->data; 1140 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1141 row = mat->rmap.rstart; 1142 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1143 ct = cols; 1144 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1148 } 1149 ierr = PetscFree(ct);CHKERRQ(ierr); 1150 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1151 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 /* 1153 Everyone has to call to draw the matrix since the graphics waits are 1154 synchronized across all processors that share the PetscDraw object 1155 */ 1156 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1157 if (!rank) { 1158 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1159 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1160 } 1161 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1162 ierr = MatDestroy(A);CHKERRQ(ierr); 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatView_MPIAIJ" 1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscErrorCode ierr; 1172 PetscTruth iascii,isdraw,issocket,isbinary; 1173 1174 PetscFunctionBegin; 1175 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1176 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1177 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1178 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1179 if (iascii || isdraw || isbinary || issocket) { 1180 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1181 } else { 1182 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRelax_MPIAIJ" 1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1190 { 1191 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1192 PetscErrorCode ierr; 1193 Vec bb1; 1194 1195 PetscFunctionBegin; 1196 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1197 1198 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1199 if (flag & SOR_ZERO_INITIAL_GUESS) { 1200 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1201 its--; 1202 } 1203 1204 while (its--) { 1205 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 1208 /* update rhs: bb1 = bb - B*x */ 1209 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1211 1212 /* local sweep */ 1213 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1214 } 1215 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1216 if (flag & SOR_ZERO_INITIAL_GUESS) { 1217 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1218 its--; 1219 } 1220 while (its--) { 1221 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 1224 /* update rhs: bb1 = bb - B*x */ 1225 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1226 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1227 1228 /* local sweep */ 1229 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1230 } 1231 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1232 if (flag & SOR_ZERO_INITIAL_GUESS) { 1233 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1234 its--; 1235 } 1236 while (its--) { 1237 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* update rhs: bb1 = bb - B*x */ 1241 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1242 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1243 1244 /* local sweep */ 1245 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1246 } 1247 } else { 1248 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1249 } 1250 1251 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatPermute_MPIAIJ" 1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1258 { 1259 MPI_Comm comm,pcomm; 1260 PetscInt first,local_size,nrows,*rows; 1261 int ntids; 1262 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1267 /* make a collective version of 'rowp' */ 1268 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1269 if (pcomm==comm) { 1270 crowp = rowp; 1271 } else { 1272 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1273 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1274 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1275 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1276 } 1277 /* collect the global row permutation and invert it */ 1278 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1279 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1280 if (pcomm!=comm) { 1281 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1282 } 1283 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1284 /* get the local target indices */ 1285 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1286 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1287 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1288 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1289 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1290 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1291 /* the column permutation is so much easier; 1292 make a local version of 'colp' and invert it */ 1293 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1294 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1295 if (ntids==1) { 1296 lcolp = colp; 1297 } else { 1298 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1299 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1300 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1301 } 1302 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1303 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1304 if (ntids>1) { 1305 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1306 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1307 } 1308 /* now we just get the submatrix */ 1309 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1310 /* clean up */ 1311 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1312 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1319 { 1320 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1321 Mat A = mat->A,B = mat->B; 1322 PetscErrorCode ierr; 1323 PetscReal isend[5],irecv[5]; 1324 1325 PetscFunctionBegin; 1326 info->block_size = 1.0; 1327 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1328 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1329 isend[3] = info->memory; isend[4] = info->mallocs; 1330 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1331 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1332 isend[3] += info->memory; isend[4] += info->mallocs; 1333 if (flag == MAT_LOCAL) { 1334 info->nz_used = isend[0]; 1335 info->nz_allocated = isend[1]; 1336 info->nz_unneeded = isend[2]; 1337 info->memory = isend[3]; 1338 info->mallocs = isend[4]; 1339 } else if (flag == MAT_GLOBAL_MAX) { 1340 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1341 info->nz_used = irecv[0]; 1342 info->nz_allocated = irecv[1]; 1343 info->nz_unneeded = irecv[2]; 1344 info->memory = irecv[3]; 1345 info->mallocs = irecv[4]; 1346 } else if (flag == MAT_GLOBAL_SUM) { 1347 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1348 info->nz_used = irecv[0]; 1349 info->nz_allocated = irecv[1]; 1350 info->nz_unneeded = irecv[2]; 1351 info->memory = irecv[3]; 1352 info->mallocs = irecv[4]; 1353 } 1354 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1355 info->fill_ratio_needed = 0; 1356 info->factor_mallocs = 0; 1357 info->rows_global = (double)matin->rmap.N; 1358 info->columns_global = (double)matin->cmap.N; 1359 info->rows_local = (double)matin->rmap.n; 1360 info->columns_local = (double)matin->cmap.N; 1361 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatSetOption_MPIAIJ" 1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1368 { 1369 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 switch (op) { 1374 case MAT_NEW_NONZERO_LOCATIONS: 1375 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1376 case MAT_KEEP_ZEROED_ROWS: 1377 case MAT_NEW_NONZERO_LOCATION_ERR: 1378 case MAT_USE_INODES: 1379 case MAT_IGNORE_ZERO_ENTRIES: 1380 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1381 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1382 break; 1383 case MAT_ROW_ORIENTED: 1384 a->roworiented = flg; 1385 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1386 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1387 break; 1388 case MAT_NEW_DIAGONALS: 1389 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1390 break; 1391 case MAT_IGNORE_OFF_PROC_ENTRIES: 1392 a->donotstash = PETSC_TRUE; 1393 break; 1394 case MAT_SYMMETRIC: 1395 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1396 break; 1397 case MAT_STRUCTURALLY_SYMMETRIC: 1398 case MAT_HERMITIAN: 1399 case MAT_SYMMETRY_ETERNAL: 1400 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1401 break; 1402 default: 1403 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatGetRow_MPIAIJ" 1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1411 { 1412 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1413 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1414 PetscErrorCode ierr; 1415 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1416 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1417 PetscInt *cmap,*idx_p; 1418 1419 PetscFunctionBegin; 1420 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1421 mat->getrowactive = PETSC_TRUE; 1422 1423 if (!mat->rowvalues && (idx || v)) { 1424 /* 1425 allocate enough space to hold information from the longest row. 1426 */ 1427 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1428 PetscInt max = 1,tmp; 1429 for (i=0; i<matin->rmap.n; i++) { 1430 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1431 if (max < tmp) { max = tmp; } 1432 } 1433 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1434 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1435 } 1436 1437 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1438 lrow = row - rstart; 1439 1440 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1441 if (!v) {pvA = 0; pvB = 0;} 1442 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1443 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1444 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1445 nztot = nzA + nzB; 1446 1447 cmap = mat->garray; 1448 if (v || idx) { 1449 if (nztot) { 1450 /* Sort by increasing column numbers, assuming A and B already sorted */ 1451 PetscInt imark = -1; 1452 if (v) { 1453 *v = v_p = mat->rowvalues; 1454 for (i=0; i<nzB; i++) { 1455 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1456 else break; 1457 } 1458 imark = i; 1459 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1460 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1461 } 1462 if (idx) { 1463 *idx = idx_p = mat->rowindices; 1464 if (imark > -1) { 1465 for (i=0; i<imark; i++) { 1466 idx_p[i] = cmap[cworkB[i]]; 1467 } 1468 } else { 1469 for (i=0; i<nzB; i++) { 1470 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1471 else break; 1472 } 1473 imark = i; 1474 } 1475 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1476 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1477 } 1478 } else { 1479 if (idx) *idx = 0; 1480 if (v) *v = 0; 1481 } 1482 } 1483 *nz = nztot; 1484 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1485 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1492 { 1493 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1494 1495 PetscFunctionBegin; 1496 if (!aij->getrowactive) { 1497 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1498 } 1499 aij->getrowactive = PETSC_FALSE; 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatNorm_MPIAIJ" 1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1506 { 1507 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1508 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1509 PetscErrorCode ierr; 1510 PetscInt i,j,cstart = mat->cmap.rstart; 1511 PetscReal sum = 0.0; 1512 MatScalar *v; 1513 1514 PetscFunctionBegin; 1515 if (aij->size == 1) { 1516 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1517 } else { 1518 if (type == NORM_FROBENIUS) { 1519 v = amat->a; 1520 for (i=0; i<amat->nz; i++) { 1521 #if defined(PETSC_USE_COMPLEX) 1522 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1523 #else 1524 sum += (*v)*(*v); v++; 1525 #endif 1526 } 1527 v = bmat->a; 1528 for (i=0; i<bmat->nz; i++) { 1529 #if defined(PETSC_USE_COMPLEX) 1530 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1531 #else 1532 sum += (*v)*(*v); v++; 1533 #endif 1534 } 1535 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1536 *norm = sqrt(*norm); 1537 } else if (type == NORM_1) { /* max column norm */ 1538 PetscReal *tmp,*tmp2; 1539 PetscInt *jj,*garray = aij->garray; 1540 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1541 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1542 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1543 *norm = 0.0; 1544 v = amat->a; jj = amat->j; 1545 for (j=0; j<amat->nz; j++) { 1546 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1547 } 1548 v = bmat->a; jj = bmat->j; 1549 for (j=0; j<bmat->nz; j++) { 1550 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1551 } 1552 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1553 for (j=0; j<mat->cmap.N; j++) { 1554 if (tmp2[j] > *norm) *norm = tmp2[j]; 1555 } 1556 ierr = PetscFree(tmp);CHKERRQ(ierr); 1557 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1558 } else if (type == NORM_INFINITY) { /* max row norm */ 1559 PetscReal ntemp = 0.0; 1560 for (j=0; j<aij->A->rmap.n; j++) { 1561 v = amat->a + amat->i[j]; 1562 sum = 0.0; 1563 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1564 sum += PetscAbsScalar(*v); v++; 1565 } 1566 v = bmat->a + bmat->i[j]; 1567 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1568 sum += PetscAbsScalar(*v); v++; 1569 } 1570 if (sum > ntemp) ntemp = sum; 1571 } 1572 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1573 } else { 1574 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1575 } 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatTranspose_MPIAIJ" 1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1583 { 1584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1585 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1586 PetscErrorCode ierr; 1587 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1588 PetscInt cstart=A->cmap.rstart,ncol; 1589 Mat B; 1590 MatScalar *array; 1591 1592 PetscFunctionBegin; 1593 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1594 1595 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1596 ai = Aloc->i; aj = Aloc->j; 1597 bi = Bloc->i; bj = Bloc->j; 1598 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1599 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1600 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1601 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1602 for (i=0; i<ai[ma]; i++){ 1603 d_nnz[aj[i]] ++; 1604 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1605 } 1606 1607 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1608 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1609 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1610 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1611 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1612 } else { 1613 B = *matout; 1614 } 1615 1616 /* copy over the A part */ 1617 array = Aloc->a; 1618 row = A->rmap.rstart; 1619 for (i=0; i<ma; i++) { 1620 ncol = ai[i+1]-ai[i]; 1621 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1622 row++; array += ncol; aj += ncol; 1623 } 1624 aj = Aloc->j; 1625 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1626 1627 /* copy over the B part */ 1628 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1629 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1630 array = Bloc->a; 1631 row = A->rmap.rstart; 1632 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1633 for (i=0; i<mb; i++) { 1634 ncol = bi[i+1]-bi[i]; 1635 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1636 row++; array += ncol; cols += ncol; 1637 } 1638 ierr = PetscFree(cols);CHKERRQ(ierr); 1639 1640 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 if (*matout != A) { 1643 *matout = B; 1644 } else { 1645 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1646 } 1647 PetscFunctionReturn(0); 1648 } 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1652 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1653 { 1654 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1655 Mat a = aij->A,b = aij->B; 1656 PetscErrorCode ierr; 1657 PetscInt s1,s2,s3; 1658 1659 PetscFunctionBegin; 1660 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1661 if (rr) { 1662 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1663 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1664 /* Overlap communication with computation. */ 1665 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 } 1667 if (ll) { 1668 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1669 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1670 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1671 } 1672 /* scale the diagonal block */ 1673 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1674 1675 if (rr) { 1676 /* Do a scatter end and then right scale the off-diagonal block */ 1677 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1679 } 1680 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1686 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 PetscErrorCode ierr; 1690 1691 PetscFunctionBegin; 1692 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1693 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1694 PetscFunctionReturn(0); 1695 } 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1698 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1699 { 1700 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1701 PetscErrorCode ierr; 1702 1703 PetscFunctionBegin; 1704 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatEqual_MPIAIJ" 1710 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1711 { 1712 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1713 Mat a,b,c,d; 1714 PetscTruth flg; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 a = matA->A; b = matA->B; 1719 c = matB->A; d = matB->B; 1720 1721 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1722 if (flg) { 1723 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1724 } 1725 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatCopy_MPIAIJ" 1731 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1732 { 1733 PetscErrorCode ierr; 1734 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1735 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1736 1737 PetscFunctionBegin; 1738 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1739 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1740 /* because of the column compression in the off-processor part of the matrix a->B, 1741 the number of columns in a->B and b->B may be different, hence we cannot call 1742 the MatCopy() directly on the two parts. If need be, we can provide a more 1743 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1744 then copying the submatrices */ 1745 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1746 } else { 1747 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1748 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1749 } 1750 PetscFunctionReturn(0); 1751 } 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1755 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1756 { 1757 PetscErrorCode ierr; 1758 1759 PetscFunctionBegin; 1760 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #include "petscblaslapack.h" 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatAXPY_MPIAIJ" 1767 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1768 { 1769 PetscErrorCode ierr; 1770 PetscInt i; 1771 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1772 PetscBLASInt bnz,one=1; 1773 Mat_SeqAIJ *x,*y; 1774 1775 PetscFunctionBegin; 1776 if (str == SAME_NONZERO_PATTERN) { 1777 PetscScalar alpha = a; 1778 x = (Mat_SeqAIJ *)xx->A->data; 1779 y = (Mat_SeqAIJ *)yy->A->data; 1780 bnz = PetscBLASIntCast(x->nz); 1781 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1782 x = (Mat_SeqAIJ *)xx->B->data; 1783 y = (Mat_SeqAIJ *)yy->B->data; 1784 bnz = PetscBLASIntCast(x->nz); 1785 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1786 } else if (str == SUBSET_NONZERO_PATTERN) { 1787 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1788 1789 x = (Mat_SeqAIJ *)xx->B->data; 1790 y = (Mat_SeqAIJ *)yy->B->data; 1791 if (y->xtoy && y->XtoY != xx->B) { 1792 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1793 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1794 } 1795 if (!y->xtoy) { /* get xtoy */ 1796 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1797 y->XtoY = xx->B; 1798 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1799 } 1800 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1801 } else { 1802 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1803 } 1804 PetscFunctionReturn(0); 1805 } 1806 1807 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatConjugate_MPIAIJ" 1811 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1812 { 1813 #if defined(PETSC_USE_COMPLEX) 1814 PetscErrorCode ierr; 1815 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1816 1817 PetscFunctionBegin; 1818 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1819 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1820 #else 1821 PetscFunctionBegin; 1822 #endif 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatRealPart_MPIAIJ" 1828 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1829 { 1830 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1831 PetscErrorCode ierr; 1832 1833 PetscFunctionBegin; 1834 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1835 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1841 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1842 { 1843 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1844 PetscErrorCode ierr; 1845 1846 PetscFunctionBegin; 1847 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1848 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 1852 #ifdef PETSC_HAVE_PBGL 1853 1854 #include <boost/parallel/mpi/bsp_process_group.hpp> 1855 #include <boost/graph/distributed/ilu_default_graph.hpp> 1856 #include <boost/graph/distributed/ilu_0_block.hpp> 1857 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1858 #include <boost/graph/distributed/petsc/interface.hpp> 1859 #include <boost/multi_array.hpp> 1860 #include <boost/parallel/distributed_property_map.hpp> 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1864 /* 1865 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1866 */ 1867 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1868 { 1869 namespace petsc = boost::distributed::petsc; 1870 1871 namespace graph_dist = boost::graph::distributed; 1872 using boost::graph::distributed::ilu_default::process_group_type; 1873 using boost::graph::ilu_permuted; 1874 1875 PetscTruth row_identity, col_identity; 1876 PetscContainer c; 1877 PetscInt m, n, M, N; 1878 PetscErrorCode ierr; 1879 1880 PetscFunctionBegin; 1881 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1882 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1883 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1884 if (!row_identity || !col_identity) { 1885 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1886 } 1887 1888 process_group_type pg; 1889 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1890 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1891 lgraph_type& level_graph = *lgraph_p; 1892 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1893 1894 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1895 ilu_permuted(level_graph); 1896 1897 /* put together the new matrix */ 1898 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1899 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1900 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1901 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1902 ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1903 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1904 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1905 (*fact)->factor = FACTOR_LU; 1906 1907 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1908 ierr = PetscContainerSetPointer(c, lgraph_p); 1909 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1915 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1916 { 1917 PetscFunctionBegin; 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatSolve_MPIAIJ" 1923 /* 1924 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1925 */ 1926 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1927 { 1928 namespace graph_dist = boost::graph::distributed; 1929 1930 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1931 lgraph_type* lgraph_p; 1932 PetscContainer c; 1933 PetscErrorCode ierr; 1934 1935 PetscFunctionBegin; 1936 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1937 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1938 ierr = VecCopy(b, x); CHKERRQ(ierr); 1939 1940 PetscScalar* array_x; 1941 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1942 PetscInt sx; 1943 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1944 1945 PetscScalar* array_b; 1946 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1947 PetscInt sb; 1948 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1949 1950 lgraph_type& level_graph = *lgraph_p; 1951 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1952 1953 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1954 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1955 ref_x(array_x, boost::extents[num_vertices(graph)]); 1956 1957 typedef boost::iterator_property_map<array_ref_type::iterator, 1958 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1959 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1960 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1961 1962 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1963 1964 PetscFunctionReturn(0); 1965 } 1966 #endif 1967 1968 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1969 PetscInt nzlocal,nsends,nrecvs; 1970 PetscMPIInt *send_rank; 1971 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1972 PetscScalar *sbuf_a,**rbuf_a; 1973 PetscErrorCode (*MatDestroy)(Mat); 1974 } Mat_Redundant; 1975 1976 #undef __FUNCT__ 1977 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1978 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1979 { 1980 PetscErrorCode ierr; 1981 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1982 PetscInt i; 1983 1984 PetscFunctionBegin; 1985 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1986 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1987 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1988 for (i=0; i<redund->nrecvs; i++){ 1989 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1990 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1991 } 1992 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1993 ierr = PetscFree(redund);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatDestroy_MatRedundant" 1999 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2000 { 2001 PetscErrorCode ierr; 2002 PetscContainer container; 2003 Mat_Redundant *redund=PETSC_NULL; 2004 2005 PetscFunctionBegin; 2006 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2007 if (container) { 2008 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2009 } else { 2010 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2011 } 2012 A->ops->destroy = redund->MatDestroy; 2013 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2014 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2015 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2021 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2022 { 2023 PetscMPIInt rank,size; 2024 MPI_Comm comm=((PetscObject)mat)->comm; 2025 PetscErrorCode ierr; 2026 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2027 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2028 PetscInt *rowrange=mat->rmap.range; 2029 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2030 Mat A=aij->A,B=aij->B,C=*matredundant; 2031 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2032 PetscScalar *sbuf_a; 2033 PetscInt nzlocal=a->nz+b->nz; 2034 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2035 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 2036 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2037 MatScalar *aworkA,*aworkB; 2038 PetscScalar *vals; 2039 PetscMPIInt tag1,tag2,tag3,imdex; 2040 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2041 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2042 MPI_Status recv_status,*send_status; 2043 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2044 PetscInt **rbuf_j=PETSC_NULL; 2045 PetscScalar **rbuf_a=PETSC_NULL; 2046 Mat_Redundant *redund=PETSC_NULL; 2047 PetscContainer container; 2048 2049 PetscFunctionBegin; 2050 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2051 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2052 2053 if (reuse == MAT_REUSE_MATRIX) { 2054 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2055 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2056 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2057 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2058 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2059 if (container) { 2060 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2061 } else { 2062 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2063 } 2064 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2065 2066 nsends = redund->nsends; 2067 nrecvs = redund->nrecvs; 2068 send_rank = redund->send_rank; recv_rank = send_rank + size; 2069 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2070 sbuf_j = redund->sbuf_j; 2071 sbuf_a = redund->sbuf_a; 2072 rbuf_j = redund->rbuf_j; 2073 rbuf_a = redund->rbuf_a; 2074 } 2075 2076 if (reuse == MAT_INITIAL_MATRIX){ 2077 PetscMPIInt subrank,subsize; 2078 PetscInt nleftover,np_subcomm; 2079 /* get the destination processors' id send_rank, nsends and nrecvs */ 2080 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2081 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2082 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2083 recv_rank = send_rank + size; 2084 np_subcomm = size/nsubcomm; 2085 nleftover = size - nsubcomm*np_subcomm; 2086 nsends = 0; nrecvs = 0; 2087 for (i=0; i<size; i++){ /* i=rank*/ 2088 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2089 send_rank[nsends] = i; nsends++; 2090 recv_rank[nrecvs++] = i; 2091 } 2092 } 2093 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2094 i = size-nleftover-1; 2095 j = 0; 2096 while (j < nsubcomm - nleftover){ 2097 send_rank[nsends++] = i; 2098 i--; j++; 2099 } 2100 } 2101 2102 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2103 for (i=0; i<nleftover; i++){ 2104 recv_rank[nrecvs++] = size-nleftover+i; 2105 } 2106 } 2107 2108 /* allocate sbuf_j, sbuf_a */ 2109 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2110 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2111 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2112 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2113 2114 /* copy mat's local entries into the buffers */ 2115 if (reuse == MAT_INITIAL_MATRIX){ 2116 rownz_max = 0; 2117 rptr = sbuf_j; 2118 cols = sbuf_j + rend-rstart + 1; 2119 vals = sbuf_a; 2120 rptr[0] = 0; 2121 for (i=0; i<rend-rstart; i++){ 2122 row = i + rstart; 2123 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2124 ncols = nzA + nzB; 2125 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2126 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2127 /* load the column indices for this row into cols */ 2128 lwrite = 0; 2129 for (l=0; l<nzB; l++) { 2130 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2131 vals[lwrite] = aworkB[l]; 2132 cols[lwrite++] = ctmp; 2133 } 2134 } 2135 for (l=0; l<nzA; l++){ 2136 vals[lwrite] = aworkA[l]; 2137 cols[lwrite++] = cstart + cworkA[l]; 2138 } 2139 for (l=0; l<nzB; l++) { 2140 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2141 vals[lwrite] = aworkB[l]; 2142 cols[lwrite++] = ctmp; 2143 } 2144 } 2145 vals += ncols; 2146 cols += ncols; 2147 rptr[i+1] = rptr[i] + ncols; 2148 if (rownz_max < ncols) rownz_max = ncols; 2149 } 2150 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2151 } else { /* only copy matrix values into sbuf_a */ 2152 rptr = sbuf_j; 2153 vals = sbuf_a; 2154 rptr[0] = 0; 2155 for (i=0; i<rend-rstart; i++){ 2156 row = i + rstart; 2157 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2158 ncols = nzA + nzB; 2159 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2160 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2161 lwrite = 0; 2162 for (l=0; l<nzB; l++) { 2163 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2164 } 2165 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2166 for (l=0; l<nzB; l++) { 2167 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2168 } 2169 vals += ncols; 2170 rptr[i+1] = rptr[i] + ncols; 2171 } 2172 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2173 2174 /* send nzlocal to others, and recv other's nzlocal */ 2175 /*--------------------------------------------------*/ 2176 if (reuse == MAT_INITIAL_MATRIX){ 2177 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2178 s_waits2 = s_waits3 + nsends; 2179 s_waits1 = s_waits2 + nsends; 2180 r_waits1 = s_waits1 + nsends; 2181 r_waits2 = r_waits1 + nrecvs; 2182 r_waits3 = r_waits2 + nrecvs; 2183 } else { 2184 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2185 r_waits3 = s_waits3 + nsends; 2186 } 2187 2188 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2189 if (reuse == MAT_INITIAL_MATRIX){ 2190 /* get new tags to keep the communication clean */ 2191 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2192 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2193 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2194 rbuf_nz = sbuf_nz + nsends; 2195 2196 /* post receives of other's nzlocal */ 2197 for (i=0; i<nrecvs; i++){ 2198 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2199 } 2200 /* send nzlocal to others */ 2201 for (i=0; i<nsends; i++){ 2202 sbuf_nz[i] = nzlocal; 2203 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2204 } 2205 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2206 count = nrecvs; 2207 while (count) { 2208 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2209 recv_rank[imdex] = recv_status.MPI_SOURCE; 2210 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2211 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2212 2213 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2214 rbuf_nz[imdex] += i + 2; 2215 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2216 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2217 count--; 2218 } 2219 /* wait on sends of nzlocal */ 2220 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2221 /* send mat->i,j to others, and recv from other's */ 2222 /*------------------------------------------------*/ 2223 for (i=0; i<nsends; i++){ 2224 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2225 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2226 } 2227 /* wait on receives of mat->i,j */ 2228 /*------------------------------*/ 2229 count = nrecvs; 2230 while (count) { 2231 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2232 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2233 count--; 2234 } 2235 /* wait on sends of mat->i,j */ 2236 /*---------------------------*/ 2237 if (nsends) { 2238 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2239 } 2240 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2241 2242 /* post receives, send and receive mat->a */ 2243 /*----------------------------------------*/ 2244 for (imdex=0; imdex<nrecvs; imdex++) { 2245 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2246 } 2247 for (i=0; i<nsends; i++){ 2248 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2249 } 2250 count = nrecvs; 2251 while (count) { 2252 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2253 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2254 count--; 2255 } 2256 if (nsends) { 2257 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2258 } 2259 2260 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2261 2262 /* create redundant matrix */ 2263 /*-------------------------*/ 2264 if (reuse == MAT_INITIAL_MATRIX){ 2265 /* compute rownz_max for preallocation */ 2266 for (imdex=0; imdex<nrecvs; imdex++){ 2267 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2268 rptr = rbuf_j[imdex]; 2269 for (i=0; i<j; i++){ 2270 ncols = rptr[i+1] - rptr[i]; 2271 if (rownz_max < ncols) rownz_max = ncols; 2272 } 2273 } 2274 2275 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2276 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2277 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2278 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2279 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2280 } else { 2281 C = *matredundant; 2282 } 2283 2284 /* insert local matrix entries */ 2285 rptr = sbuf_j; 2286 cols = sbuf_j + rend-rstart + 1; 2287 vals = sbuf_a; 2288 for (i=0; i<rend-rstart; i++){ 2289 row = i + rstart; 2290 ncols = rptr[i+1] - rptr[i]; 2291 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2292 vals += ncols; 2293 cols += ncols; 2294 } 2295 /* insert received matrix entries */ 2296 for (imdex=0; imdex<nrecvs; imdex++){ 2297 rstart = rowrange[recv_rank[imdex]]; 2298 rend = rowrange[recv_rank[imdex]+1]; 2299 rptr = rbuf_j[imdex]; 2300 cols = rbuf_j[imdex] + rend-rstart + 1; 2301 vals = rbuf_a[imdex]; 2302 for (i=0; i<rend-rstart; i++){ 2303 row = i + rstart; 2304 ncols = rptr[i+1] - rptr[i]; 2305 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2306 vals += ncols; 2307 cols += ncols; 2308 } 2309 } 2310 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2311 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2312 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2313 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2314 if (reuse == MAT_INITIAL_MATRIX){ 2315 PetscContainer container; 2316 *matredundant = C; 2317 /* create a supporting struct and attach it to C for reuse */ 2318 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2319 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2320 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2321 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2322 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2323 2324 redund->nzlocal = nzlocal; 2325 redund->nsends = nsends; 2326 redund->nrecvs = nrecvs; 2327 redund->send_rank = send_rank; 2328 redund->sbuf_nz = sbuf_nz; 2329 redund->sbuf_j = sbuf_j; 2330 redund->sbuf_a = sbuf_a; 2331 redund->rbuf_j = rbuf_j; 2332 redund->rbuf_a = rbuf_a; 2333 2334 redund->MatDestroy = C->ops->destroy; 2335 C->ops->destroy = MatDestroy_MatRedundant; 2336 } 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2342 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2343 { 2344 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2345 PetscInt n = A->rmap.n; 2346 PetscInt cstart = A->cmap.rstart; 2347 PetscInt *cmap = mat->garray; 2348 PetscInt *diagIdx, *offdiagIdx; 2349 Vec diagV, offdiagV; 2350 PetscScalar *a, *diagA, *offdiagA; 2351 PetscInt r; 2352 PetscErrorCode ierr; 2353 2354 PetscFunctionBegin; 2355 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2356 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2357 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2358 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2359 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2360 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2361 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2362 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2363 for(r = 0; r < n; ++r) { 2364 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2365 a[r] = diagA[r]; 2366 idx[r] = cstart + diagIdx[r]; 2367 } else { 2368 a[r] = offdiagA[r]; 2369 idx[r] = cmap[offdiagIdx[r]]; 2370 } 2371 } 2372 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2373 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2374 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2375 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2376 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2377 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2383 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2384 { 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2389 PetscFunctionReturn(0); 2390 } 2391 2392 /* -------------------------------------------------------------------*/ 2393 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2394 MatGetRow_MPIAIJ, 2395 MatRestoreRow_MPIAIJ, 2396 MatMult_MPIAIJ, 2397 /* 4*/ MatMultAdd_MPIAIJ, 2398 MatMultTranspose_MPIAIJ, 2399 MatMultTransposeAdd_MPIAIJ, 2400 #ifdef PETSC_HAVE_PBGL 2401 MatSolve_MPIAIJ, 2402 #else 2403 0, 2404 #endif 2405 0, 2406 0, 2407 /*10*/ 0, 2408 0, 2409 0, 2410 MatRelax_MPIAIJ, 2411 MatTranspose_MPIAIJ, 2412 /*15*/ MatGetInfo_MPIAIJ, 2413 MatEqual_MPIAIJ, 2414 MatGetDiagonal_MPIAIJ, 2415 MatDiagonalScale_MPIAIJ, 2416 MatNorm_MPIAIJ, 2417 /*20*/ MatAssemblyBegin_MPIAIJ, 2418 MatAssemblyEnd_MPIAIJ, 2419 0, 2420 MatSetOption_MPIAIJ, 2421 MatZeroEntries_MPIAIJ, 2422 /*25*/ MatZeroRows_MPIAIJ, 2423 0, 2424 #ifdef PETSC_HAVE_PBGL 2425 MatLUFactorNumeric_MPIAIJ, 2426 #else 2427 0, 2428 #endif 2429 0, 2430 0, 2431 /*30*/ MatSetUpPreallocation_MPIAIJ, 2432 #ifdef PETSC_HAVE_PBGL 2433 MatILUFactorSymbolic_MPIAIJ, 2434 #else 2435 0, 2436 #endif 2437 0, 2438 0, 2439 0, 2440 /*35*/ MatDuplicate_MPIAIJ, 2441 0, 2442 0, 2443 0, 2444 0, 2445 /*40*/ MatAXPY_MPIAIJ, 2446 MatGetSubMatrices_MPIAIJ, 2447 MatIncreaseOverlap_MPIAIJ, 2448 MatGetValues_MPIAIJ, 2449 MatCopy_MPIAIJ, 2450 /*45*/ 0, 2451 MatScale_MPIAIJ, 2452 0, 2453 0, 2454 0, 2455 /*50*/ MatSetBlockSize_MPIAIJ, 2456 0, 2457 0, 2458 0, 2459 0, 2460 /*55*/ MatFDColoringCreate_MPIAIJ, 2461 0, 2462 MatSetUnfactored_MPIAIJ, 2463 MatPermute_MPIAIJ, 2464 0, 2465 /*60*/ MatGetSubMatrix_MPIAIJ, 2466 MatDestroy_MPIAIJ, 2467 MatView_MPIAIJ, 2468 0, 2469 0, 2470 /*65*/ 0, 2471 0, 2472 0, 2473 0, 2474 0, 2475 /*70*/ 0, 2476 0, 2477 MatSetColoring_MPIAIJ, 2478 #if defined(PETSC_HAVE_ADIC) 2479 MatSetValuesAdic_MPIAIJ, 2480 #else 2481 0, 2482 #endif 2483 MatSetValuesAdifor_MPIAIJ, 2484 /*75*/ 0, 2485 0, 2486 0, 2487 0, 2488 0, 2489 /*80*/ 0, 2490 0, 2491 0, 2492 0, 2493 /*84*/ MatLoad_MPIAIJ, 2494 0, 2495 0, 2496 0, 2497 0, 2498 0, 2499 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2500 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2501 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2502 MatPtAP_Basic, 2503 MatPtAPSymbolic_MPIAIJ, 2504 /*95*/ MatPtAPNumeric_MPIAIJ, 2505 0, 2506 0, 2507 0, 2508 0, 2509 /*100*/0, 2510 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2511 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2512 MatConjugate_MPIAIJ, 2513 0, 2514 /*105*/MatSetValuesRow_MPIAIJ, 2515 MatRealPart_MPIAIJ, 2516 MatImaginaryPart_MPIAIJ, 2517 0, 2518 0, 2519 /*110*/0, 2520 MatGetRedundantMatrix_MPIAIJ, 2521 MatGetRowMin_MPIAIJ, 2522 0, 2523 0, 2524 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2525 2526 /* ----------------------------------------------------------------------------------------*/ 2527 2528 EXTERN_C_BEGIN 2529 #undef __FUNCT__ 2530 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2531 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2532 { 2533 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2534 PetscErrorCode ierr; 2535 2536 PetscFunctionBegin; 2537 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2538 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2539 PetscFunctionReturn(0); 2540 } 2541 EXTERN_C_END 2542 2543 EXTERN_C_BEGIN 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2546 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2547 { 2548 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2549 PetscErrorCode ierr; 2550 2551 PetscFunctionBegin; 2552 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2553 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 EXTERN_C_END 2557 2558 #include "petscpc.h" 2559 EXTERN_C_BEGIN 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2562 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2563 { 2564 Mat_MPIAIJ *b; 2565 PetscErrorCode ierr; 2566 PetscInt i; 2567 2568 PetscFunctionBegin; 2569 B->preallocated = PETSC_TRUE; 2570 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2571 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2572 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2573 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2574 2575 B->rmap.bs = B->cmap.bs = 1; 2576 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2577 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2578 if (d_nnz) { 2579 for (i=0; i<B->rmap.n; i++) { 2580 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2581 } 2582 } 2583 if (o_nnz) { 2584 for (i=0; i<B->rmap.n; i++) { 2585 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2586 } 2587 } 2588 b = (Mat_MPIAIJ*)B->data; 2589 2590 /* Explicitly create 2 MATSEQAIJ matrices. */ 2591 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2592 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2593 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2594 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2595 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2596 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2597 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2598 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2599 2600 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2601 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2602 2603 PetscFunctionReturn(0); 2604 } 2605 EXTERN_C_END 2606 2607 #undef __FUNCT__ 2608 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2609 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2610 { 2611 Mat mat; 2612 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2613 PetscErrorCode ierr; 2614 2615 PetscFunctionBegin; 2616 *newmat = 0; 2617 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2618 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2619 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2620 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2621 a = (Mat_MPIAIJ*)mat->data; 2622 2623 mat->factor = matin->factor; 2624 mat->rmap.bs = matin->rmap.bs; 2625 mat->assembled = PETSC_TRUE; 2626 mat->insertmode = NOT_SET_VALUES; 2627 mat->preallocated = PETSC_TRUE; 2628 2629 a->size = oldmat->size; 2630 a->rank = oldmat->rank; 2631 a->donotstash = oldmat->donotstash; 2632 a->roworiented = oldmat->roworiented; 2633 a->rowindices = 0; 2634 a->rowvalues = 0; 2635 a->getrowactive = PETSC_FALSE; 2636 2637 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2638 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2639 2640 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2641 if (oldmat->colmap) { 2642 #if defined (PETSC_USE_CTABLE) 2643 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2644 #else 2645 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2646 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2647 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2648 #endif 2649 } else a->colmap = 0; 2650 if (oldmat->garray) { 2651 PetscInt len; 2652 len = oldmat->B->cmap.n; 2653 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2654 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2655 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2656 } else a->garray = 0; 2657 2658 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2659 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2660 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2661 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2662 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2663 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2664 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2665 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2666 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2667 *newmat = mat; 2668 PetscFunctionReturn(0); 2669 } 2670 2671 #include "petscsys.h" 2672 2673 #undef __FUNCT__ 2674 #define __FUNCT__ "MatLoad_MPIAIJ" 2675 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2676 { 2677 Mat A; 2678 PetscScalar *vals,*svals; 2679 MPI_Comm comm = ((PetscObject)viewer)->comm; 2680 MPI_Status status; 2681 PetscErrorCode ierr; 2682 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2683 PetscInt i,nz,j,rstart,rend,mmax; 2684 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2685 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2686 PetscInt cend,cstart,n,*rowners; 2687 int fd; 2688 2689 PetscFunctionBegin; 2690 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2691 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2692 if (!rank) { 2693 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2694 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2695 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2696 } 2697 2698 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2699 M = header[1]; N = header[2]; 2700 /* determine ownership of all rows */ 2701 m = M/size + ((M % size) > rank); 2702 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2703 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2704 2705 /* First process needs enough room for process with most rows */ 2706 if (!rank) { 2707 mmax = rowners[1]; 2708 for (i=2; i<size; i++) { 2709 mmax = PetscMax(mmax,rowners[i]); 2710 } 2711 } else mmax = m; 2712 2713 rowners[0] = 0; 2714 for (i=2; i<=size; i++) { 2715 rowners[i] += rowners[i-1]; 2716 } 2717 rstart = rowners[rank]; 2718 rend = rowners[rank+1]; 2719 2720 /* distribute row lengths to all processors */ 2721 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2722 if (!rank) { 2723 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2724 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2725 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2726 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2727 for (j=0; j<m; j++) { 2728 procsnz[0] += ourlens[j]; 2729 } 2730 for (i=1; i<size; i++) { 2731 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2732 /* calculate the number of nonzeros on each processor */ 2733 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2734 procsnz[i] += rowlengths[j]; 2735 } 2736 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2737 } 2738 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2739 } else { 2740 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2741 } 2742 2743 if (!rank) { 2744 /* determine max buffer needed and allocate it */ 2745 maxnz = 0; 2746 for (i=0; i<size; i++) { 2747 maxnz = PetscMax(maxnz,procsnz[i]); 2748 } 2749 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2750 2751 /* read in my part of the matrix column indices */ 2752 nz = procsnz[0]; 2753 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2754 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2755 2756 /* read in every one elses and ship off */ 2757 for (i=1; i<size; i++) { 2758 nz = procsnz[i]; 2759 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2760 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2761 } 2762 ierr = PetscFree(cols);CHKERRQ(ierr); 2763 } else { 2764 /* determine buffer space needed for message */ 2765 nz = 0; 2766 for (i=0; i<m; i++) { 2767 nz += ourlens[i]; 2768 } 2769 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2770 2771 /* receive message of column indices*/ 2772 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2773 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2774 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2775 } 2776 2777 /* determine column ownership if matrix is not square */ 2778 if (N != M) { 2779 n = N/size + ((N % size) > rank); 2780 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2781 cstart = cend - n; 2782 } else { 2783 cstart = rstart; 2784 cend = rend; 2785 n = cend - cstart; 2786 } 2787 2788 /* loop over local rows, determining number of off diagonal entries */ 2789 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2790 jj = 0; 2791 for (i=0; i<m; i++) { 2792 for (j=0; j<ourlens[i]; j++) { 2793 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2794 jj++; 2795 } 2796 } 2797 2798 /* create our matrix */ 2799 for (i=0; i<m; i++) { 2800 ourlens[i] -= offlens[i]; 2801 } 2802 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2803 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2804 ierr = MatSetType(A,type);CHKERRQ(ierr); 2805 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2806 2807 for (i=0; i<m; i++) { 2808 ourlens[i] += offlens[i]; 2809 } 2810 2811 if (!rank) { 2812 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2813 2814 /* read in my part of the matrix numerical values */ 2815 nz = procsnz[0]; 2816 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2817 2818 /* insert into matrix */ 2819 jj = rstart; 2820 smycols = mycols; 2821 svals = vals; 2822 for (i=0; i<m; i++) { 2823 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2824 smycols += ourlens[i]; 2825 svals += ourlens[i]; 2826 jj++; 2827 } 2828 2829 /* read in other processors and ship out */ 2830 for (i=1; i<size; i++) { 2831 nz = procsnz[i]; 2832 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2833 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2834 } 2835 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2836 } else { 2837 /* receive numeric values */ 2838 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2839 2840 /* receive message of values*/ 2841 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2842 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2843 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2844 2845 /* insert into matrix */ 2846 jj = rstart; 2847 smycols = mycols; 2848 svals = vals; 2849 for (i=0; i<m; i++) { 2850 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2851 smycols += ourlens[i]; 2852 svals += ourlens[i]; 2853 jj++; 2854 } 2855 } 2856 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2857 ierr = PetscFree(vals);CHKERRQ(ierr); 2858 ierr = PetscFree(mycols);CHKERRQ(ierr); 2859 ierr = PetscFree(rowners);CHKERRQ(ierr); 2860 2861 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2863 *newmat = A; 2864 PetscFunctionReturn(0); 2865 } 2866 2867 #undef __FUNCT__ 2868 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2869 /* 2870 Not great since it makes two copies of the submatrix, first an SeqAIJ 2871 in local and then by concatenating the local matrices the end result. 2872 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2873 */ 2874 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2875 { 2876 PetscErrorCode ierr; 2877 PetscMPIInt rank,size; 2878 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2879 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2880 Mat *local,M,Mreuse; 2881 MatScalar *vwork,*aa; 2882 MPI_Comm comm = ((PetscObject)mat)->comm; 2883 Mat_SeqAIJ *aij; 2884 2885 2886 PetscFunctionBegin; 2887 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2888 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2889 2890 if (call == MAT_REUSE_MATRIX) { 2891 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2892 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2893 local = &Mreuse; 2894 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2895 } else { 2896 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2897 Mreuse = *local; 2898 ierr = PetscFree(local);CHKERRQ(ierr); 2899 } 2900 2901 /* 2902 m - number of local rows 2903 n - number of columns (same on all processors) 2904 rstart - first row in new global matrix generated 2905 */ 2906 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2907 if (call == MAT_INITIAL_MATRIX) { 2908 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2909 ii = aij->i; 2910 jj = aij->j; 2911 2912 /* 2913 Determine the number of non-zeros in the diagonal and off-diagonal 2914 portions of the matrix in order to do correct preallocation 2915 */ 2916 2917 /* first get start and end of "diagonal" columns */ 2918 if (csize == PETSC_DECIDE) { 2919 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2920 if (mglobal == n) { /* square matrix */ 2921 nlocal = m; 2922 } else { 2923 nlocal = n/size + ((n % size) > rank); 2924 } 2925 } else { 2926 nlocal = csize; 2927 } 2928 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2929 rstart = rend - nlocal; 2930 if (rank == size - 1 && rend != n) { 2931 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2932 } 2933 2934 /* next, compute all the lengths */ 2935 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2936 olens = dlens + m; 2937 for (i=0; i<m; i++) { 2938 jend = ii[i+1] - ii[i]; 2939 olen = 0; 2940 dlen = 0; 2941 for (j=0; j<jend; j++) { 2942 if (*jj < rstart || *jj >= rend) olen++; 2943 else dlen++; 2944 jj++; 2945 } 2946 olens[i] = olen; 2947 dlens[i] = dlen; 2948 } 2949 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2950 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2951 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2952 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2953 ierr = PetscFree(dlens);CHKERRQ(ierr); 2954 } else { 2955 PetscInt ml,nl; 2956 2957 M = *newmat; 2958 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2959 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2960 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2961 /* 2962 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2963 rather than the slower MatSetValues(). 2964 */ 2965 M->was_assembled = PETSC_TRUE; 2966 M->assembled = PETSC_FALSE; 2967 } 2968 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2969 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2970 ii = aij->i; 2971 jj = aij->j; 2972 aa = aij->a; 2973 for (i=0; i<m; i++) { 2974 row = rstart + i; 2975 nz = ii[i+1] - ii[i]; 2976 cwork = jj; jj += nz; 2977 vwork = aa; aa += nz; 2978 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2979 } 2980 2981 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2983 *newmat = M; 2984 2985 /* save submatrix used in processor for next request */ 2986 if (call == MAT_INITIAL_MATRIX) { 2987 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2988 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2989 } 2990 2991 PetscFunctionReturn(0); 2992 } 2993 2994 EXTERN_C_BEGIN 2995 #undef __FUNCT__ 2996 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2997 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2998 { 2999 PetscInt m,cstart, cend,j,nnz,i,d; 3000 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3001 const PetscInt *JJ; 3002 PetscScalar *values; 3003 PetscErrorCode ierr; 3004 3005 PetscFunctionBegin; 3006 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3007 3008 B->rmap.bs = B->cmap.bs = 1; 3009 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 3010 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 3011 m = B->rmap.n; 3012 cstart = B->cmap.rstart; 3013 cend = B->cmap.rend; 3014 rstart = B->rmap.rstart; 3015 3016 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3017 o_nnz = d_nnz + m; 3018 3019 #if defined(PETSC_USE_DEBUGGING) 3020 for (i=0; i<m; i++) { 3021 nnz = Ii[i+1]- Ii[i]; 3022 JJ = J + Ii[i]; 3023 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3024 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3025 if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N); 3026 for (j=1; j<nnz; j++) { 3027 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3028 } 3029 } 3030 #endif 3031 3032 for (i=0; i<m; i++) { 3033 nnz = Ii[i+1]- Ii[i]; 3034 JJ = J + Ii[i]; 3035 nnz_max = PetscMax(nnz_max,nnz); 3036 for (j=0; j<nnz; j++) { 3037 if (*JJ >= cstart) break; 3038 JJ++; 3039 } 3040 d = 0; 3041 for (; j<nnz; j++) { 3042 if (*JJ++ >= cend) break; 3043 d++; 3044 } 3045 d_nnz[i] = d; 3046 o_nnz[i] = nnz - d; 3047 } 3048 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3049 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3050 3051 if (v) values = (PetscScalar*)v; 3052 else { 3053 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3054 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3055 } 3056 3057 for (i=0; i<m; i++) { 3058 ii = i + rstart; 3059 nnz = Ii[i+1]- Ii[i]; 3060 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3061 } 3062 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3063 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3064 3065 if (!v) { 3066 ierr = PetscFree(values);CHKERRQ(ierr); 3067 } 3068 PetscFunctionReturn(0); 3069 } 3070 EXTERN_C_END 3071 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3074 /*@ 3075 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3076 (the default parallel PETSc format). 3077 3078 Collective on MPI_Comm 3079 3080 Input Parameters: 3081 + B - the matrix 3082 . i - the indices into j for the start of each local row (starts with zero) 3083 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3084 - v - optional values in the matrix 3085 3086 Level: developer 3087 3088 Notes: 3089 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3090 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3091 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3092 3093 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3094 3095 The format which is used for the sparse matrix input, is equivalent to a 3096 row-major ordering.. i.e for the following matrix, the input data expected is 3097 as shown: 3098 3099 1 0 0 3100 2 0 3 P0 3101 ------- 3102 4 5 6 P1 3103 3104 Process0 [P0]: rows_owned=[0,1] 3105 i = {0,1,3} [size = nrow+1 = 2+1] 3106 j = {0,0,2} [size = nz = 6] 3107 v = {1,2,3} [size = nz = 6] 3108 3109 Process1 [P1]: rows_owned=[2] 3110 i = {0,3} [size = nrow+1 = 1+1] 3111 j = {0,1,2} [size = nz = 6] 3112 v = {4,5,6} [size = nz = 6] 3113 3114 The column indices for each row MUST be sorted. 3115 3116 .keywords: matrix, aij, compressed row, sparse, parallel 3117 3118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3119 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3120 @*/ 3121 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3122 { 3123 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3124 3125 PetscFunctionBegin; 3126 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3127 if (f) { 3128 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3129 } 3130 PetscFunctionReturn(0); 3131 } 3132 3133 #undef __FUNCT__ 3134 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3135 /*@C 3136 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3137 (the default parallel PETSc format). For good matrix assembly performance 3138 the user should preallocate the matrix storage by setting the parameters 3139 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3140 performance can be increased by more than a factor of 50. 3141 3142 Collective on MPI_Comm 3143 3144 Input Parameters: 3145 + A - the matrix 3146 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3147 (same value is used for all local rows) 3148 . d_nnz - array containing the number of nonzeros in the various rows of the 3149 DIAGONAL portion of the local submatrix (possibly different for each row) 3150 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3151 The size of this array is equal to the number of local rows, i.e 'm'. 3152 You must leave room for the diagonal entry even if it is zero. 3153 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3154 submatrix (same value is used for all local rows). 3155 - o_nnz - array containing the number of nonzeros in the various rows of the 3156 OFF-DIAGONAL portion of the local submatrix (possibly different for 3157 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3158 structure. The size of this array is equal to the number 3159 of local rows, i.e 'm'. 3160 3161 If the *_nnz parameter is given then the *_nz parameter is ignored 3162 3163 The AIJ format (also called the Yale sparse matrix format or 3164 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3165 storage. The stored row and column indices begin with zero. See the users manual for details. 3166 3167 The parallel matrix is partitioned such that the first m0 rows belong to 3168 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3169 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3170 3171 The DIAGONAL portion of the local submatrix of a processor can be defined 3172 as the submatrix which is obtained by extraction the part corresponding 3173 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3174 first row that belongs to the processor, and r2 is the last row belonging 3175 to the this processor. This is a square mxm matrix. The remaining portion 3176 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3177 3178 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3179 3180 You can call MatGetInfo() to get information on how effective the preallocation was; 3181 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3182 You can also run with the option -info and look for messages with the string 3183 malloc in them to see if additional memory allocation was needed. 3184 3185 Example usage: 3186 3187 Consider the following 8x8 matrix with 34 non-zero values, that is 3188 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3189 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3190 as follows: 3191 3192 .vb 3193 1 2 0 | 0 3 0 | 0 4 3194 Proc0 0 5 6 | 7 0 0 | 8 0 3195 9 0 10 | 11 0 0 | 12 0 3196 ------------------------------------- 3197 13 0 14 | 15 16 17 | 0 0 3198 Proc1 0 18 0 | 19 20 21 | 0 0 3199 0 0 0 | 22 23 0 | 24 0 3200 ------------------------------------- 3201 Proc2 25 26 27 | 0 0 28 | 29 0 3202 30 0 0 | 31 32 33 | 0 34 3203 .ve 3204 3205 This can be represented as a collection of submatrices as: 3206 3207 .vb 3208 A B C 3209 D E F 3210 G H I 3211 .ve 3212 3213 Where the submatrices A,B,C are owned by proc0, D,E,F are 3214 owned by proc1, G,H,I are owned by proc2. 3215 3216 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3217 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3218 The 'M','N' parameters are 8,8, and have the same values on all procs. 3219 3220 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3221 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3222 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3223 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3224 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3225 matrix, ans [DF] as another SeqAIJ matrix. 3226 3227 When d_nz, o_nz parameters are specified, d_nz storage elements are 3228 allocated for every row of the local diagonal submatrix, and o_nz 3229 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3230 One way to choose d_nz and o_nz is to use the max nonzerors per local 3231 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3232 In this case, the values of d_nz,o_nz are: 3233 .vb 3234 proc0 : dnz = 2, o_nz = 2 3235 proc1 : dnz = 3, o_nz = 2 3236 proc2 : dnz = 1, o_nz = 4 3237 .ve 3238 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3239 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3240 for proc3. i.e we are using 12+15+10=37 storage locations to store 3241 34 values. 3242 3243 When d_nnz, o_nnz parameters are specified, the storage is specified 3244 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3245 In the above case the values for d_nnz,o_nnz are: 3246 .vb 3247 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3248 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3249 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3250 .ve 3251 Here the space allocated is sum of all the above values i.e 34, and 3252 hence pre-allocation is perfect. 3253 3254 Level: intermediate 3255 3256 .keywords: matrix, aij, compressed row, sparse, parallel 3257 3258 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3259 MPIAIJ, MatGetInfo() 3260 @*/ 3261 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3262 { 3263 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3264 3265 PetscFunctionBegin; 3266 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3267 if (f) { 3268 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3269 } 3270 PetscFunctionReturn(0); 3271 } 3272 3273 #undef __FUNCT__ 3274 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3275 /*@ 3276 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3277 CSR format the local rows. 3278 3279 Collective on MPI_Comm 3280 3281 Input Parameters: 3282 + comm - MPI communicator 3283 . m - number of local rows (Cannot be PETSC_DECIDE) 3284 . n - This value should be the same as the local size used in creating the 3285 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3286 calculated if N is given) For square matrices n is almost always m. 3287 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3288 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3289 . i - row indices 3290 . j - column indices 3291 - a - matrix values 3292 3293 Output Parameter: 3294 . mat - the matrix 3295 3296 Level: intermediate 3297 3298 Notes: 3299 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3300 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3301 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3302 3303 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3304 3305 The format which is used for the sparse matrix input, is equivalent to a 3306 row-major ordering.. i.e for the following matrix, the input data expected is 3307 as shown: 3308 3309 1 0 0 3310 2 0 3 P0 3311 ------- 3312 4 5 6 P1 3313 3314 Process0 [P0]: rows_owned=[0,1] 3315 i = {0,1,3} [size = nrow+1 = 2+1] 3316 j = {0,0,2} [size = nz = 6] 3317 v = {1,2,3} [size = nz = 6] 3318 3319 Process1 [P1]: rows_owned=[2] 3320 i = {0,3} [size = nrow+1 = 1+1] 3321 j = {0,1,2} [size = nz = 6] 3322 v = {4,5,6} [size = nz = 6] 3323 3324 .keywords: matrix, aij, compressed row, sparse, parallel 3325 3326 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3327 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3328 @*/ 3329 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3330 { 3331 PetscErrorCode ierr; 3332 3333 PetscFunctionBegin; 3334 if (i[0]) { 3335 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3336 } 3337 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3338 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3339 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3340 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3341 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3342 PetscFunctionReturn(0); 3343 } 3344 3345 #undef __FUNCT__ 3346 #define __FUNCT__ "MatCreateMPIAIJ" 3347 /*@C 3348 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3349 (the default parallel PETSc format). For good matrix assembly performance 3350 the user should preallocate the matrix storage by setting the parameters 3351 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3352 performance can be increased by more than a factor of 50. 3353 3354 Collective on MPI_Comm 3355 3356 Input Parameters: 3357 + comm - MPI communicator 3358 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3359 This value should be the same as the local size used in creating the 3360 y vector for the matrix-vector product y = Ax. 3361 . n - This value should be the same as the local size used in creating the 3362 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3363 calculated if N is given) For square matrices n is almost always m. 3364 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3365 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3366 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3367 (same value is used for all local rows) 3368 . d_nnz - array containing the number of nonzeros in the various rows of the 3369 DIAGONAL portion of the local submatrix (possibly different for each row) 3370 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3371 The size of this array is equal to the number of local rows, i.e 'm'. 3372 You must leave room for the diagonal entry even if it is zero. 3373 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3374 submatrix (same value is used for all local rows). 3375 - o_nnz - array containing the number of nonzeros in the various rows of the 3376 OFF-DIAGONAL portion of the local submatrix (possibly different for 3377 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3378 structure. The size of this array is equal to the number 3379 of local rows, i.e 'm'. 3380 3381 Output Parameter: 3382 . A - the matrix 3383 3384 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3385 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3386 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3387 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3388 3389 Notes: 3390 If the *_nnz parameter is given then the *_nz parameter is ignored 3391 3392 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3393 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3394 storage requirements for this matrix. 3395 3396 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3397 processor than it must be used on all processors that share the object for 3398 that argument. 3399 3400 The user MUST specify either the local or global matrix dimensions 3401 (possibly both). 3402 3403 The parallel matrix is partitioned across processors such that the 3404 first m0 rows belong to process 0, the next m1 rows belong to 3405 process 1, the next m2 rows belong to process 2 etc.. where 3406 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3407 values corresponding to [m x N] submatrix. 3408 3409 The columns are logically partitioned with the n0 columns belonging 3410 to 0th partition, the next n1 columns belonging to the next 3411 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3412 3413 The DIAGONAL portion of the local submatrix on any given processor 3414 is the submatrix corresponding to the rows and columns m,n 3415 corresponding to the given processor. i.e diagonal matrix on 3416 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3417 etc. The remaining portion of the local submatrix [m x (N-n)] 3418 constitute the OFF-DIAGONAL portion. The example below better 3419 illustrates this concept. 3420 3421 For a square global matrix we define each processor's diagonal portion 3422 to be its local rows and the corresponding columns (a square submatrix); 3423 each processor's off-diagonal portion encompasses the remainder of the 3424 local matrix (a rectangular submatrix). 3425 3426 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3427 3428 When calling this routine with a single process communicator, a matrix of 3429 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3430 type of communicator, use the construction mechanism: 3431 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3432 3433 By default, this format uses inodes (identical nodes) when possible. 3434 We search for consecutive rows with the same nonzero structure, thereby 3435 reusing matrix information to achieve increased efficiency. 3436 3437 Options Database Keys: 3438 + -mat_no_inode - Do not use inodes 3439 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3440 - -mat_aij_oneindex - Internally use indexing starting at 1 3441 rather than 0. Note that when calling MatSetValues(), 3442 the user still MUST index entries starting at 0! 3443 3444 3445 Example usage: 3446 3447 Consider the following 8x8 matrix with 34 non-zero values, that is 3448 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3449 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3450 as follows: 3451 3452 .vb 3453 1 2 0 | 0 3 0 | 0 4 3454 Proc0 0 5 6 | 7 0 0 | 8 0 3455 9 0 10 | 11 0 0 | 12 0 3456 ------------------------------------- 3457 13 0 14 | 15 16 17 | 0 0 3458 Proc1 0 18 0 | 19 20 21 | 0 0 3459 0 0 0 | 22 23 0 | 24 0 3460 ------------------------------------- 3461 Proc2 25 26 27 | 0 0 28 | 29 0 3462 30 0 0 | 31 32 33 | 0 34 3463 .ve 3464 3465 This can be represented as a collection of submatrices as: 3466 3467 .vb 3468 A B C 3469 D E F 3470 G H I 3471 .ve 3472 3473 Where the submatrices A,B,C are owned by proc0, D,E,F are 3474 owned by proc1, G,H,I are owned by proc2. 3475 3476 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3477 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3478 The 'M','N' parameters are 8,8, and have the same values on all procs. 3479 3480 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3481 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3482 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3483 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3484 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3485 matrix, ans [DF] as another SeqAIJ matrix. 3486 3487 When d_nz, o_nz parameters are specified, d_nz storage elements are 3488 allocated for every row of the local diagonal submatrix, and o_nz 3489 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3490 One way to choose d_nz and o_nz is to use the max nonzerors per local 3491 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3492 In this case, the values of d_nz,o_nz are: 3493 .vb 3494 proc0 : dnz = 2, o_nz = 2 3495 proc1 : dnz = 3, o_nz = 2 3496 proc2 : dnz = 1, o_nz = 4 3497 .ve 3498 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3499 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3500 for proc3. i.e we are using 12+15+10=37 storage locations to store 3501 34 values. 3502 3503 When d_nnz, o_nnz parameters are specified, the storage is specified 3504 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3505 In the above case the values for d_nnz,o_nnz are: 3506 .vb 3507 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3508 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3509 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3510 .ve 3511 Here the space allocated is sum of all the above values i.e 34, and 3512 hence pre-allocation is perfect. 3513 3514 Level: intermediate 3515 3516 .keywords: matrix, aij, compressed row, sparse, parallel 3517 3518 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3519 MPIAIJ, MatCreateMPIAIJWithArrays() 3520 @*/ 3521 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3522 { 3523 PetscErrorCode ierr; 3524 PetscMPIInt size; 3525 3526 PetscFunctionBegin; 3527 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3528 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3529 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3530 if (size > 1) { 3531 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3532 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3533 } else { 3534 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3535 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3536 } 3537 PetscFunctionReturn(0); 3538 } 3539 3540 #undef __FUNCT__ 3541 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3542 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3543 { 3544 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3545 3546 PetscFunctionBegin; 3547 *Ad = a->A; 3548 *Ao = a->B; 3549 *colmap = a->garray; 3550 PetscFunctionReturn(0); 3551 } 3552 3553 #undef __FUNCT__ 3554 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3555 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3556 { 3557 PetscErrorCode ierr; 3558 PetscInt i; 3559 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3560 3561 PetscFunctionBegin; 3562 if (coloring->ctype == IS_COLORING_GLOBAL) { 3563 ISColoringValue *allcolors,*colors; 3564 ISColoring ocoloring; 3565 3566 /* set coloring for diagonal portion */ 3567 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3568 3569 /* set coloring for off-diagonal portion */ 3570 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3571 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3572 for (i=0; i<a->B->cmap.n; i++) { 3573 colors[i] = allcolors[a->garray[i]]; 3574 } 3575 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3576 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3577 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3578 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3579 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3580 ISColoringValue *colors; 3581 PetscInt *larray; 3582 ISColoring ocoloring; 3583 3584 /* set coloring for diagonal portion */ 3585 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3586 for (i=0; i<a->A->cmap.n; i++) { 3587 larray[i] = i + A->cmap.rstart; 3588 } 3589 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3590 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3591 for (i=0; i<a->A->cmap.n; i++) { 3592 colors[i] = coloring->colors[larray[i]]; 3593 } 3594 ierr = PetscFree(larray);CHKERRQ(ierr); 3595 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3596 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3597 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3598 3599 /* set coloring for off-diagonal portion */ 3600 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3601 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3602 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3603 for (i=0; i<a->B->cmap.n; i++) { 3604 colors[i] = coloring->colors[larray[i]]; 3605 } 3606 ierr = PetscFree(larray);CHKERRQ(ierr); 3607 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3608 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3609 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3610 } else { 3611 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3612 } 3613 3614 PetscFunctionReturn(0); 3615 } 3616 3617 #if defined(PETSC_HAVE_ADIC) 3618 #undef __FUNCT__ 3619 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3620 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3621 { 3622 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3623 PetscErrorCode ierr; 3624 3625 PetscFunctionBegin; 3626 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3627 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3628 PetscFunctionReturn(0); 3629 } 3630 #endif 3631 3632 #undef __FUNCT__ 3633 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3634 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3635 { 3636 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3637 PetscErrorCode ierr; 3638 3639 PetscFunctionBegin; 3640 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3641 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3642 PetscFunctionReturn(0); 3643 } 3644 3645 #undef __FUNCT__ 3646 #define __FUNCT__ "MatMerge" 3647 /*@ 3648 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3649 matrices from each processor 3650 3651 Collective on MPI_Comm 3652 3653 Input Parameters: 3654 + comm - the communicators the parallel matrix will live on 3655 . inmat - the input sequential matrices 3656 . n - number of local columns (or PETSC_DECIDE) 3657 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3658 3659 Output Parameter: 3660 . outmat - the parallel matrix generated 3661 3662 Level: advanced 3663 3664 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3665 3666 @*/ 3667 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3668 { 3669 PetscErrorCode ierr; 3670 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3671 PetscInt *indx; 3672 PetscScalar *values; 3673 3674 PetscFunctionBegin; 3675 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3676 if (scall == MAT_INITIAL_MATRIX){ 3677 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3678 if (n == PETSC_DECIDE){ 3679 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3680 } 3681 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3682 rstart -= m; 3683 3684 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3685 for (i=0;i<m;i++) { 3686 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3687 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3688 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3689 } 3690 /* This routine will ONLY return MPIAIJ type matrix */ 3691 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3692 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3693 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3694 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3695 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3696 3697 } else if (scall == MAT_REUSE_MATRIX){ 3698 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3699 } else { 3700 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3701 } 3702 3703 for (i=0;i<m;i++) { 3704 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3705 Ii = i + rstart; 3706 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3707 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3708 } 3709 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3710 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3712 3713 PetscFunctionReturn(0); 3714 } 3715 3716 #undef __FUNCT__ 3717 #define __FUNCT__ "MatFileSplit" 3718 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3719 { 3720 PetscErrorCode ierr; 3721 PetscMPIInt rank; 3722 PetscInt m,N,i,rstart,nnz; 3723 size_t len; 3724 const PetscInt *indx; 3725 PetscViewer out; 3726 char *name; 3727 Mat B; 3728 const PetscScalar *values; 3729 3730 PetscFunctionBegin; 3731 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3732 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3733 /* Should this be the type of the diagonal block of A? */ 3734 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3735 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3736 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3737 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3738 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3739 for (i=0;i<m;i++) { 3740 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3741 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3742 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3743 } 3744 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3745 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3746 3747 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3748 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3749 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3750 sprintf(name,"%s.%d",outfile,rank); 3751 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3752 ierr = PetscFree(name); 3753 ierr = MatView(B,out);CHKERRQ(ierr); 3754 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3755 ierr = MatDestroy(B);CHKERRQ(ierr); 3756 PetscFunctionReturn(0); 3757 } 3758 3759 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3760 #undef __FUNCT__ 3761 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3762 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3763 { 3764 PetscErrorCode ierr; 3765 Mat_Merge_SeqsToMPI *merge; 3766 PetscContainer container; 3767 3768 PetscFunctionBegin; 3769 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3770 if (container) { 3771 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3772 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3773 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3774 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3775 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3776 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3777 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3778 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3779 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3780 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3781 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3782 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3783 3784 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3785 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3786 } 3787 ierr = PetscFree(merge);CHKERRQ(ierr); 3788 3789 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3790 PetscFunctionReturn(0); 3791 } 3792 3793 #include "src/mat/utils/freespace.h" 3794 #include "petscbt.h" 3795 3796 #undef __FUNCT__ 3797 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3798 /*@C 3799 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3800 matrices from each processor 3801 3802 Collective on MPI_Comm 3803 3804 Input Parameters: 3805 + comm - the communicators the parallel matrix will live on 3806 . seqmat - the input sequential matrices 3807 . m - number of local rows (or PETSC_DECIDE) 3808 . n - number of local columns (or PETSC_DECIDE) 3809 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3810 3811 Output Parameter: 3812 . mpimat - the parallel matrix generated 3813 3814 Level: advanced 3815 3816 Notes: 3817 The dimensions of the sequential matrix in each processor MUST be the same. 3818 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3819 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3820 @*/ 3821 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3822 { 3823 PetscErrorCode ierr; 3824 MPI_Comm comm=((PetscObject)mpimat)->comm; 3825 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3826 PetscMPIInt size,rank,taga,*len_s; 3827 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3828 PetscInt proc,m; 3829 PetscInt **buf_ri,**buf_rj; 3830 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3831 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3832 MPI_Request *s_waits,*r_waits; 3833 MPI_Status *status; 3834 MatScalar *aa=a->a; 3835 MatScalar **abuf_r,*ba_i; 3836 Mat_Merge_SeqsToMPI *merge; 3837 PetscContainer container; 3838 3839 PetscFunctionBegin; 3840 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3841 3842 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3843 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3844 3845 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3846 if (container) { 3847 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3848 } 3849 bi = merge->bi; 3850 bj = merge->bj; 3851 buf_ri = merge->buf_ri; 3852 buf_rj = merge->buf_rj; 3853 3854 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3855 owners = merge->rowmap.range; 3856 len_s = merge->len_s; 3857 3858 /* send and recv matrix values */ 3859 /*-----------------------------*/ 3860 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3861 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3862 3863 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3864 for (proc=0,k=0; proc<size; proc++){ 3865 if (!len_s[proc]) continue; 3866 i = owners[proc]; 3867 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3868 k++; 3869 } 3870 3871 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3872 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3873 ierr = PetscFree(status);CHKERRQ(ierr); 3874 3875 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3876 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3877 3878 /* insert mat values of mpimat */ 3879 /*----------------------------*/ 3880 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 3881 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3882 nextrow = buf_ri_k + merge->nrecv; 3883 nextai = nextrow + merge->nrecv; 3884 3885 for (k=0; k<merge->nrecv; k++){ 3886 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3887 nrows = *(buf_ri_k[k]); 3888 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3889 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3890 } 3891 3892 /* set values of ba */ 3893 m = merge->rowmap.n; 3894 for (i=0; i<m; i++) { 3895 arow = owners[rank] + i; 3896 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3897 bnzi = bi[i+1] - bi[i]; 3898 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3899 3900 /* add local non-zero vals of this proc's seqmat into ba */ 3901 anzi = ai[arow+1] - ai[arow]; 3902 aj = a->j + ai[arow]; 3903 aa = a->a + ai[arow]; 3904 nextaj = 0; 3905 for (j=0; nextaj<anzi; j++){ 3906 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3907 ba_i[j] += aa[nextaj++]; 3908 } 3909 } 3910 3911 /* add received vals into ba */ 3912 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3913 /* i-th row */ 3914 if (i == *nextrow[k]) { 3915 anzi = *(nextai[k]+1) - *nextai[k]; 3916 aj = buf_rj[k] + *(nextai[k]); 3917 aa = abuf_r[k] + *(nextai[k]); 3918 nextaj = 0; 3919 for (j=0; nextaj<anzi; j++){ 3920 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3921 ba_i[j] += aa[nextaj++]; 3922 } 3923 } 3924 nextrow[k]++; nextai[k]++; 3925 } 3926 } 3927 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3928 } 3929 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3930 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3931 3932 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3933 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3934 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3935 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3936 PetscFunctionReturn(0); 3937 } 3938 3939 #undef __FUNCT__ 3940 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3941 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3942 { 3943 PetscErrorCode ierr; 3944 Mat B_mpi; 3945 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3946 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3947 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3948 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3949 PetscInt len,proc,*dnz,*onz; 3950 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3951 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3952 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3953 MPI_Status *status; 3954 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3955 PetscBT lnkbt; 3956 Mat_Merge_SeqsToMPI *merge; 3957 PetscContainer container; 3958 3959 PetscFunctionBegin; 3960 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3961 3962 /* make sure it is a PETSc comm */ 3963 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3964 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3965 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3966 3967 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3968 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3969 3970 /* determine row ownership */ 3971 /*---------------------------------------------------------*/ 3972 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3973 merge->rowmap.n = m; 3974 merge->rowmap.N = M; 3975 merge->rowmap.bs = 1; 3976 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3977 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3978 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3979 3980 m = merge->rowmap.n; 3981 M = merge->rowmap.N; 3982 owners = merge->rowmap.range; 3983 3984 /* determine the number of messages to send, their lengths */ 3985 /*---------------------------------------------------------*/ 3986 len_s = merge->len_s; 3987 3988 len = 0; /* length of buf_si[] */ 3989 merge->nsend = 0; 3990 for (proc=0; proc<size; proc++){ 3991 len_si[proc] = 0; 3992 if (proc == rank){ 3993 len_s[proc] = 0; 3994 } else { 3995 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3996 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3997 } 3998 if (len_s[proc]) { 3999 merge->nsend++; 4000 nrows = 0; 4001 for (i=owners[proc]; i<owners[proc+1]; i++){ 4002 if (ai[i+1] > ai[i]) nrows++; 4003 } 4004 len_si[proc] = 2*(nrows+1); 4005 len += len_si[proc]; 4006 } 4007 } 4008 4009 /* determine the number and length of messages to receive for ij-structure */ 4010 /*-------------------------------------------------------------------------*/ 4011 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4012 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4013 4014 /* post the Irecv of j-structure */ 4015 /*-------------------------------*/ 4016 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4017 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4018 4019 /* post the Isend of j-structure */ 4020 /*--------------------------------*/ 4021 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4022 sj_waits = si_waits + merge->nsend; 4023 4024 for (proc=0, k=0; proc<size; proc++){ 4025 if (!len_s[proc]) continue; 4026 i = owners[proc]; 4027 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4028 k++; 4029 } 4030 4031 /* receives and sends of j-structure are complete */ 4032 /*------------------------------------------------*/ 4033 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4034 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4035 4036 /* send and recv i-structure */ 4037 /*---------------------------*/ 4038 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4039 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4040 4041 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4042 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4043 for (proc=0,k=0; proc<size; proc++){ 4044 if (!len_s[proc]) continue; 4045 /* form outgoing message for i-structure: 4046 buf_si[0]: nrows to be sent 4047 [1:nrows]: row index (global) 4048 [nrows+1:2*nrows+1]: i-structure index 4049 */ 4050 /*-------------------------------------------*/ 4051 nrows = len_si[proc]/2 - 1; 4052 buf_si_i = buf_si + nrows+1; 4053 buf_si[0] = nrows; 4054 buf_si_i[0] = 0; 4055 nrows = 0; 4056 for (i=owners[proc]; i<owners[proc+1]; i++){ 4057 anzi = ai[i+1] - ai[i]; 4058 if (anzi) { 4059 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4060 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4061 nrows++; 4062 } 4063 } 4064 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4065 k++; 4066 buf_si += len_si[proc]; 4067 } 4068 4069 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4070 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4071 4072 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4073 for (i=0; i<merge->nrecv; i++){ 4074 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4075 } 4076 4077 ierr = PetscFree(len_si);CHKERRQ(ierr); 4078 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4079 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4080 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4081 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4082 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4083 ierr = PetscFree(status);CHKERRQ(ierr); 4084 4085 /* compute a local seq matrix in each processor */ 4086 /*----------------------------------------------*/ 4087 /* allocate bi array and free space for accumulating nonzero column info */ 4088 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4089 bi[0] = 0; 4090 4091 /* create and initialize a linked list */ 4092 nlnk = N+1; 4093 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4094 4095 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4096 len = 0; 4097 len = ai[owners[rank+1]] - ai[owners[rank]]; 4098 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4099 current_space = free_space; 4100 4101 /* determine symbolic info for each local row */ 4102 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4103 nextrow = buf_ri_k + merge->nrecv; 4104 nextai = nextrow + merge->nrecv; 4105 for (k=0; k<merge->nrecv; k++){ 4106 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4107 nrows = *buf_ri_k[k]; 4108 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4109 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4110 } 4111 4112 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4113 len = 0; 4114 for (i=0;i<m;i++) { 4115 bnzi = 0; 4116 /* add local non-zero cols of this proc's seqmat into lnk */ 4117 arow = owners[rank] + i; 4118 anzi = ai[arow+1] - ai[arow]; 4119 aj = a->j + ai[arow]; 4120 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4121 bnzi += nlnk; 4122 /* add received col data into lnk */ 4123 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4124 if (i == *nextrow[k]) { /* i-th row */ 4125 anzi = *(nextai[k]+1) - *nextai[k]; 4126 aj = buf_rj[k] + *nextai[k]; 4127 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4128 bnzi += nlnk; 4129 nextrow[k]++; nextai[k]++; 4130 } 4131 } 4132 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4133 4134 /* if free space is not available, make more free space */ 4135 if (current_space->local_remaining<bnzi) { 4136 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4137 nspacedouble++; 4138 } 4139 /* copy data into free space, then initialize lnk */ 4140 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4141 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4142 4143 current_space->array += bnzi; 4144 current_space->local_used += bnzi; 4145 current_space->local_remaining -= bnzi; 4146 4147 bi[i+1] = bi[i] + bnzi; 4148 } 4149 4150 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4151 4152 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4153 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4154 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4155 4156 /* create symbolic parallel matrix B_mpi */ 4157 /*---------------------------------------*/ 4158 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4159 if (n==PETSC_DECIDE) { 4160 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4161 } else { 4162 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4163 } 4164 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4165 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4166 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4167 4168 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4169 B_mpi->assembled = PETSC_FALSE; 4170 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4171 merge->bi = bi; 4172 merge->bj = bj; 4173 merge->buf_ri = buf_ri; 4174 merge->buf_rj = buf_rj; 4175 merge->coi = PETSC_NULL; 4176 merge->coj = PETSC_NULL; 4177 merge->owners_co = PETSC_NULL; 4178 4179 /* attach the supporting struct to B_mpi for reuse */ 4180 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4181 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4182 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4183 *mpimat = B_mpi; 4184 4185 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4186 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4187 PetscFunctionReturn(0); 4188 } 4189 4190 #undef __FUNCT__ 4191 #define __FUNCT__ "MatMerge_SeqsToMPI" 4192 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4193 { 4194 PetscErrorCode ierr; 4195 4196 PetscFunctionBegin; 4197 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4198 if (scall == MAT_INITIAL_MATRIX){ 4199 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4200 } 4201 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4202 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4203 PetscFunctionReturn(0); 4204 } 4205 4206 #undef __FUNCT__ 4207 #define __FUNCT__ "MatGetLocalMat" 4208 /*@ 4209 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4210 4211 Not Collective 4212 4213 Input Parameters: 4214 + A - the matrix 4215 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4216 4217 Output Parameter: 4218 . A_loc - the local sequential matrix generated 4219 4220 Level: developer 4221 4222 @*/ 4223 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4224 { 4225 PetscErrorCode ierr; 4226 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4227 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4228 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4229 MatScalar *aa=a->a,*ba=b->a,*cam; 4230 PetscScalar *ca; 4231 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 4232 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4233 4234 PetscFunctionBegin; 4235 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4236 if (scall == MAT_INITIAL_MATRIX){ 4237 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4238 ci[0] = 0; 4239 for (i=0; i<am; i++){ 4240 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4241 } 4242 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4243 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4244 k = 0; 4245 for (i=0; i<am; i++) { 4246 ncols_o = bi[i+1] - bi[i]; 4247 ncols_d = ai[i+1] - ai[i]; 4248 /* off-diagonal portion of A */ 4249 for (jo=0; jo<ncols_o; jo++) { 4250 col = cmap[*bj]; 4251 if (col >= cstart) break; 4252 cj[k] = col; bj++; 4253 ca[k++] = *ba++; 4254 } 4255 /* diagonal portion of A */ 4256 for (j=0; j<ncols_d; j++) { 4257 cj[k] = cstart + *aj++; 4258 ca[k++] = *aa++; 4259 } 4260 /* off-diagonal portion of A */ 4261 for (j=jo; j<ncols_o; j++) { 4262 cj[k] = cmap[*bj++]; 4263 ca[k++] = *ba++; 4264 } 4265 } 4266 /* put together the new matrix */ 4267 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4268 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4269 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4270 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4271 mat->free_a = PETSC_TRUE; 4272 mat->free_ij = PETSC_TRUE; 4273 mat->nonew = 0; 4274 } else if (scall == MAT_REUSE_MATRIX){ 4275 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4276 ci = mat->i; cj = mat->j; cam = mat->a; 4277 for (i=0; i<am; i++) { 4278 /* off-diagonal portion of A */ 4279 ncols_o = bi[i+1] - bi[i]; 4280 for (jo=0; jo<ncols_o; jo++) { 4281 col = cmap[*bj]; 4282 if (col >= cstart) break; 4283 *cam++ = *ba++; bj++; 4284 } 4285 /* diagonal portion of A */ 4286 ncols_d = ai[i+1] - ai[i]; 4287 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4288 /* off-diagonal portion of A */ 4289 for (j=jo; j<ncols_o; j++) { 4290 *cam++ = *ba++; bj++; 4291 } 4292 } 4293 } else { 4294 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4295 } 4296 4297 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4298 PetscFunctionReturn(0); 4299 } 4300 4301 #undef __FUNCT__ 4302 #define __FUNCT__ "MatGetLocalMatCondensed" 4303 /*@C 4304 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4305 4306 Not Collective 4307 4308 Input Parameters: 4309 + A - the matrix 4310 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4311 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4312 4313 Output Parameter: 4314 . A_loc - the local sequential matrix generated 4315 4316 Level: developer 4317 4318 @*/ 4319 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4320 { 4321 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4322 PetscErrorCode ierr; 4323 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4324 IS isrowa,iscola; 4325 Mat *aloc; 4326 4327 PetscFunctionBegin; 4328 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4329 if (!row){ 4330 start = A->rmap.rstart; end = A->rmap.rend; 4331 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4332 } else { 4333 isrowa = *row; 4334 } 4335 if (!col){ 4336 start = A->cmap.rstart; 4337 cmap = a->garray; 4338 nzA = a->A->cmap.n; 4339 nzB = a->B->cmap.n; 4340 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4341 ncols = 0; 4342 for (i=0; i<nzB; i++) { 4343 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4344 else break; 4345 } 4346 imark = i; 4347 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4348 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4349 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4350 ierr = PetscFree(idx);CHKERRQ(ierr); 4351 } else { 4352 iscola = *col; 4353 } 4354 if (scall != MAT_INITIAL_MATRIX){ 4355 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4356 aloc[0] = *A_loc; 4357 } 4358 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4359 *A_loc = aloc[0]; 4360 ierr = PetscFree(aloc);CHKERRQ(ierr); 4361 if (!row){ 4362 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4363 } 4364 if (!col){ 4365 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4366 } 4367 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4368 PetscFunctionReturn(0); 4369 } 4370 4371 #undef __FUNCT__ 4372 #define __FUNCT__ "MatGetBrowsOfAcols" 4373 /*@C 4374 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4375 4376 Collective on Mat 4377 4378 Input Parameters: 4379 + A,B - the matrices in mpiaij format 4380 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4381 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4382 4383 Output Parameter: 4384 + rowb, colb - index sets of rows and columns of B to extract 4385 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4386 - B_seq - the sequential matrix generated 4387 4388 Level: developer 4389 4390 @*/ 4391 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4392 { 4393 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4394 PetscErrorCode ierr; 4395 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4396 IS isrowb,iscolb; 4397 Mat *bseq; 4398 4399 PetscFunctionBegin; 4400 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4401 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4402 } 4403 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4404 4405 if (scall == MAT_INITIAL_MATRIX){ 4406 start = A->cmap.rstart; 4407 cmap = a->garray; 4408 nzA = a->A->cmap.n; 4409 nzB = a->B->cmap.n; 4410 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4411 ncols = 0; 4412 for (i=0; i<nzB; i++) { /* row < local row index */ 4413 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4414 else break; 4415 } 4416 imark = i; 4417 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4418 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4419 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4420 ierr = PetscFree(idx);CHKERRQ(ierr); 4421 *brstart = imark; 4422 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4423 } else { 4424 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4425 isrowb = *rowb; iscolb = *colb; 4426 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4427 bseq[0] = *B_seq; 4428 } 4429 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4430 *B_seq = bseq[0]; 4431 ierr = PetscFree(bseq);CHKERRQ(ierr); 4432 if (!rowb){ 4433 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4434 } else { 4435 *rowb = isrowb; 4436 } 4437 if (!colb){ 4438 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4439 } else { 4440 *colb = iscolb; 4441 } 4442 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4443 PetscFunctionReturn(0); 4444 } 4445 4446 #undef __FUNCT__ 4447 #define __FUNCT__ "MatGetBrowsOfAoCols" 4448 /*@C 4449 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4450 of the OFF-DIAGONAL portion of local A 4451 4452 Collective on Mat 4453 4454 Input Parameters: 4455 + A,B - the matrices in mpiaij format 4456 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4457 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4458 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4459 4460 Output Parameter: 4461 + B_oth - the sequential matrix generated 4462 4463 Level: developer 4464 4465 @*/ 4466 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4467 { 4468 VecScatter_MPI_General *gen_to,*gen_from; 4469 PetscErrorCode ierr; 4470 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4471 Mat_SeqAIJ *b_oth; 4472 VecScatter ctx=a->Mvctx; 4473 MPI_Comm comm=((PetscObject)ctx)->comm; 4474 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4475 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4476 PetscScalar *rvalues,*svalues; 4477 MatScalar *b_otha,*bufa,*bufA; 4478 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4479 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4480 MPI_Status *sstatus,rstatus; 4481 PetscMPIInt jj; 4482 PetscInt *cols,sbs,rbs; 4483 PetscScalar *vals; 4484 4485 PetscFunctionBegin; 4486 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4487 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4488 } 4489 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4490 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4491 4492 gen_to = (VecScatter_MPI_General*)ctx->todata; 4493 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4494 rvalues = gen_from->values; /* holds the length of receiving row */ 4495 svalues = gen_to->values; /* holds the length of sending row */ 4496 nrecvs = gen_from->n; 4497 nsends = gen_to->n; 4498 4499 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4500 srow = gen_to->indices; /* local row index to be sent */ 4501 sstarts = gen_to->starts; 4502 sprocs = gen_to->procs; 4503 sstatus = gen_to->sstatus; 4504 sbs = gen_to->bs; 4505 rstarts = gen_from->starts; 4506 rprocs = gen_from->procs; 4507 rbs = gen_from->bs; 4508 4509 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4510 if (scall == MAT_INITIAL_MATRIX){ 4511 /* i-array */ 4512 /*---------*/ 4513 /* post receives */ 4514 for (i=0; i<nrecvs; i++){ 4515 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4516 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4517 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4518 } 4519 4520 /* pack the outgoing message */ 4521 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4522 rstartsj = sstartsj + nsends +1; 4523 sstartsj[0] = 0; rstartsj[0] = 0; 4524 len = 0; /* total length of j or a array to be sent */ 4525 k = 0; 4526 for (i=0; i<nsends; i++){ 4527 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4528 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4529 for (j=0; j<nrows; j++) { 4530 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4531 for (l=0; l<sbs; l++){ 4532 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4533 rowlen[j*sbs+l] = ncols; 4534 len += ncols; 4535 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4536 } 4537 k++; 4538 } 4539 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4540 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4541 } 4542 /* recvs and sends of i-array are completed */ 4543 i = nrecvs; 4544 while (i--) { 4545 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4546 } 4547 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4548 4549 /* allocate buffers for sending j and a arrays */ 4550 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4551 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4552 4553 /* create i-array of B_oth */ 4554 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4555 b_othi[0] = 0; 4556 len = 0; /* total length of j or a array to be received */ 4557 k = 0; 4558 for (i=0; i<nrecvs; i++){ 4559 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4560 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4561 for (j=0; j<nrows; j++) { 4562 b_othi[k+1] = b_othi[k] + rowlen[j]; 4563 len += rowlen[j]; k++; 4564 } 4565 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4566 } 4567 4568 /* allocate space for j and a arrrays of B_oth */ 4569 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4570 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4571 4572 /* j-array */ 4573 /*---------*/ 4574 /* post receives of j-array */ 4575 for (i=0; i<nrecvs; i++){ 4576 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4577 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4578 } 4579 4580 /* pack the outgoing message j-array */ 4581 k = 0; 4582 for (i=0; i<nsends; i++){ 4583 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4584 bufJ = bufj+sstartsj[i]; 4585 for (j=0; j<nrows; j++) { 4586 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4587 for (ll=0; ll<sbs; ll++){ 4588 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4589 for (l=0; l<ncols; l++){ 4590 *bufJ++ = cols[l]; 4591 } 4592 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4593 } 4594 } 4595 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4596 } 4597 4598 /* recvs and sends of j-array are completed */ 4599 i = nrecvs; 4600 while (i--) { 4601 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4602 } 4603 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4604 } else if (scall == MAT_REUSE_MATRIX){ 4605 sstartsj = *startsj; 4606 rstartsj = sstartsj + nsends +1; 4607 bufa = *bufa_ptr; 4608 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4609 b_otha = b_oth->a; 4610 } else { 4611 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4612 } 4613 4614 /* a-array */ 4615 /*---------*/ 4616 /* post receives of a-array */ 4617 for (i=0; i<nrecvs; i++){ 4618 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4619 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4620 } 4621 4622 /* pack the outgoing message a-array */ 4623 k = 0; 4624 for (i=0; i<nsends; i++){ 4625 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4626 bufA = bufa+sstartsj[i]; 4627 for (j=0; j<nrows; j++) { 4628 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4629 for (ll=0; ll<sbs; ll++){ 4630 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4631 for (l=0; l<ncols; l++){ 4632 *bufA++ = vals[l]; 4633 } 4634 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4635 } 4636 } 4637 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4638 } 4639 /* recvs and sends of a-array are completed */ 4640 i = nrecvs; 4641 while (i--) { 4642 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4643 } 4644 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4645 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4646 4647 if (scall == MAT_INITIAL_MATRIX){ 4648 /* put together the new matrix */ 4649 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4650 4651 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4652 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4653 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4654 b_oth->free_a = PETSC_TRUE; 4655 b_oth->free_ij = PETSC_TRUE; 4656 b_oth->nonew = 0; 4657 4658 ierr = PetscFree(bufj);CHKERRQ(ierr); 4659 if (!startsj || !bufa_ptr){ 4660 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4661 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4662 } else { 4663 *startsj = sstartsj; 4664 *bufa_ptr = bufa; 4665 } 4666 } 4667 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4668 PetscFunctionReturn(0); 4669 } 4670 4671 #undef __FUNCT__ 4672 #define __FUNCT__ "MatGetCommunicationStructs" 4673 /*@C 4674 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4675 4676 Not Collective 4677 4678 Input Parameters: 4679 . A - The matrix in mpiaij format 4680 4681 Output Parameter: 4682 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4683 . colmap - A map from global column index to local index into lvec 4684 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4685 4686 Level: developer 4687 4688 @*/ 4689 #if defined (PETSC_USE_CTABLE) 4690 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4691 #else 4692 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4693 #endif 4694 { 4695 Mat_MPIAIJ *a; 4696 4697 PetscFunctionBegin; 4698 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4699 PetscValidPointer(lvec, 2) 4700 PetscValidPointer(colmap, 3) 4701 PetscValidPointer(multScatter, 4) 4702 a = (Mat_MPIAIJ *) A->data; 4703 if (lvec) *lvec = a->lvec; 4704 if (colmap) *colmap = a->colmap; 4705 if (multScatter) *multScatter = a->Mvctx; 4706 PetscFunctionReturn(0); 4707 } 4708 4709 EXTERN_C_BEGIN 4710 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4711 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4712 EXTERN_C_END 4713 4714 #include "src/mat/impls/dense/mpi/mpidense.h" 4715 4716 #undef __FUNCT__ 4717 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4718 /* 4719 Computes (B'*A')' since computing B*A directly is untenable 4720 4721 n p p 4722 ( ) ( ) ( ) 4723 m ( A ) * n ( B ) = m ( C ) 4724 ( ) ( ) ( ) 4725 4726 */ 4727 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4728 { 4729 PetscErrorCode ierr; 4730 Mat At,Bt,Ct; 4731 4732 PetscFunctionBegin; 4733 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4734 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4735 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4736 ierr = MatDestroy(At);CHKERRQ(ierr); 4737 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4738 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4739 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4740 PetscFunctionReturn(0); 4741 } 4742 4743 #undef __FUNCT__ 4744 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4745 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4746 { 4747 PetscErrorCode ierr; 4748 PetscInt m=A->rmap.n,n=B->cmap.n; 4749 Mat Cmat; 4750 4751 PetscFunctionBegin; 4752 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 4753 ierr = MatCreate(A->hdr.comm,&Cmat);CHKERRQ(ierr); 4754 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4755 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4756 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4757 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4758 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4759 *C = Cmat; 4760 PetscFunctionReturn(0); 4761 } 4762 4763 /* ----------------------------------------------------------------*/ 4764 #undef __FUNCT__ 4765 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4766 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4767 { 4768 PetscErrorCode ierr; 4769 4770 PetscFunctionBegin; 4771 if (scall == MAT_INITIAL_MATRIX){ 4772 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4773 } 4774 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4775 PetscFunctionReturn(0); 4776 } 4777 4778 /*MC 4779 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4780 4781 Options Database Keys: 4782 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4783 4784 Level: beginner 4785 4786 .seealso: MatCreateMPIAIJ() 4787 M*/ 4788 4789 EXTERN_C_BEGIN 4790 #undef __FUNCT__ 4791 #define __FUNCT__ "MatCreate_MPIAIJ" 4792 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4793 { 4794 Mat_MPIAIJ *b; 4795 PetscErrorCode ierr; 4796 PetscMPIInt size; 4797 4798 PetscFunctionBegin; 4799 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4800 4801 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4802 B->data = (void*)b; 4803 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4804 B->factor = 0; 4805 B->rmap.bs = 1; 4806 B->assembled = PETSC_FALSE; 4807 B->mapping = 0; 4808 4809 B->insertmode = NOT_SET_VALUES; 4810 b->size = size; 4811 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4812 4813 /* build cache for off array entries formed */ 4814 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4815 b->donotstash = PETSC_FALSE; 4816 b->colmap = 0; 4817 b->garray = 0; 4818 b->roworiented = PETSC_TRUE; 4819 4820 /* stuff used for matrix vector multiply */ 4821 b->lvec = PETSC_NULL; 4822 b->Mvctx = PETSC_NULL; 4823 4824 /* stuff for MatGetRow() */ 4825 b->rowindices = 0; 4826 b->rowvalues = 0; 4827 b->getrowactive = PETSC_FALSE; 4828 4829 4830 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4831 "MatStoreValues_MPIAIJ", 4832 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4833 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4834 "MatRetrieveValues_MPIAIJ", 4835 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4836 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4837 "MatGetDiagonalBlock_MPIAIJ", 4838 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4839 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4840 "MatIsTranspose_MPIAIJ", 4841 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4842 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4843 "MatMPIAIJSetPreallocation_MPIAIJ", 4844 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4845 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4846 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4847 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4848 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4849 "MatDiagonalScaleLocal_MPIAIJ", 4850 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4851 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4852 "MatConvert_MPIAIJ_MPICSRPERM", 4853 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4854 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4855 "MatConvert_MPIAIJ_MPICRL", 4856 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4857 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 4858 "MatMatMult_MPIDense_MPIAIJ", 4859 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 4860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 4861 "MatMatMultSymbolic_MPIDense_MPIAIJ", 4862 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 4863 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 4864 "MatMatMultNumeric_MPIDense_MPIAIJ", 4865 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 4866 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4867 PetscFunctionReturn(0); 4868 } 4869 EXTERN_C_END 4870 4871 #undef __FUNCT__ 4872 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4873 /*@ 4874 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4875 and "off-diagonal" part of the matrix in CSR format. 4876 4877 Collective on MPI_Comm 4878 4879 Input Parameters: 4880 + comm - MPI communicator 4881 . m - number of local rows (Cannot be PETSC_DECIDE) 4882 . n - This value should be the same as the local size used in creating the 4883 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4884 calculated if N is given) For square matrices n is almost always m. 4885 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4886 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4887 . i - row indices for "diagonal" portion of matrix 4888 . j - column indices 4889 . a - matrix values 4890 . oi - row indices for "off-diagonal" portion of matrix 4891 . oj - column indices 4892 - oa - matrix values 4893 4894 Output Parameter: 4895 . mat - the matrix 4896 4897 Level: advanced 4898 4899 Notes: 4900 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4901 4902 The i and j indices are 0 based 4903 4904 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4905 4906 4907 .keywords: matrix, aij, compressed row, sparse, parallel 4908 4909 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4910 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4911 @*/ 4912 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4913 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4914 { 4915 PetscErrorCode ierr; 4916 Mat_MPIAIJ *maij; 4917 4918 PetscFunctionBegin; 4919 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4920 if (i[0]) { 4921 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4922 } 4923 if (oi[0]) { 4924 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4925 } 4926 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4927 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4928 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4929 maij = (Mat_MPIAIJ*) (*mat)->data; 4930 maij->donotstash = PETSC_TRUE; 4931 (*mat)->preallocated = PETSC_TRUE; 4932 4933 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4934 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4935 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4936 4937 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4938 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4939 4940 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4941 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4942 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4943 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4944 4945 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4946 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4947 PetscFunctionReturn(0); 4948 } 4949 4950 /* 4951 Special version for direct calls from Fortran 4952 */ 4953 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4954 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4955 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4956 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4957 #endif 4958 4959 /* Change these macros so can be used in void function */ 4960 #undef CHKERRQ 4961 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4962 #undef SETERRQ2 4963 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4964 #undef SETERRQ 4965 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4966 4967 EXTERN_C_BEGIN 4968 #undef __FUNCT__ 4969 #define __FUNCT__ "matsetvaluesmpiaij_" 4970 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4971 { 4972 Mat mat = *mmat; 4973 PetscInt m = *mm, n = *mn; 4974 InsertMode addv = *maddv; 4975 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4976 PetscScalar value; 4977 PetscErrorCode ierr; 4978 4979 MatPreallocated(mat); 4980 if (mat->insertmode == NOT_SET_VALUES) { 4981 mat->insertmode = addv; 4982 } 4983 #if defined(PETSC_USE_DEBUG) 4984 else if (mat->insertmode != addv) { 4985 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4986 } 4987 #endif 4988 { 4989 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4990 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4991 PetscTruth roworiented = aij->roworiented; 4992 4993 /* Some Variables required in the macro */ 4994 Mat A = aij->A; 4995 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4996 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4997 MatScalar *aa = a->a; 4998 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4999 Mat B = aij->B; 5000 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5001 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 5002 MatScalar *ba = b->a; 5003 5004 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5005 PetscInt nonew = a->nonew; 5006 MatScalar *ap1,*ap2; 5007 5008 PetscFunctionBegin; 5009 for (i=0; i<m; i++) { 5010 if (im[i] < 0) continue; 5011 #if defined(PETSC_USE_DEBUG) 5012 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 5013 #endif 5014 if (im[i] >= rstart && im[i] < rend) { 5015 row = im[i] - rstart; 5016 lastcol1 = -1; 5017 rp1 = aj + ai[row]; 5018 ap1 = aa + ai[row]; 5019 rmax1 = aimax[row]; 5020 nrow1 = ailen[row]; 5021 low1 = 0; 5022 high1 = nrow1; 5023 lastcol2 = -1; 5024 rp2 = bj + bi[row]; 5025 ap2 = ba + bi[row]; 5026 rmax2 = bimax[row]; 5027 nrow2 = bilen[row]; 5028 low2 = 0; 5029 high2 = nrow2; 5030 5031 for (j=0; j<n; j++) { 5032 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5033 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5034 if (in[j] >= cstart && in[j] < cend){ 5035 col = in[j] - cstart; 5036 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5037 } else if (in[j] < 0) continue; 5038 #if defined(PETSC_USE_DEBUG) 5039 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 5040 #endif 5041 else { 5042 if (mat->was_assembled) { 5043 if (!aij->colmap) { 5044 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5045 } 5046 #if defined (PETSC_USE_CTABLE) 5047 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5048 col--; 5049 #else 5050 col = aij->colmap[in[j]] - 1; 5051 #endif 5052 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5053 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5054 col = in[j]; 5055 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5056 B = aij->B; 5057 b = (Mat_SeqAIJ*)B->data; 5058 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5059 rp2 = bj + bi[row]; 5060 ap2 = ba + bi[row]; 5061 rmax2 = bimax[row]; 5062 nrow2 = bilen[row]; 5063 low2 = 0; 5064 high2 = nrow2; 5065 bm = aij->B->rmap.n; 5066 ba = b->a; 5067 } 5068 } else col = in[j]; 5069 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5070 } 5071 } 5072 } else { 5073 if (!aij->donotstash) { 5074 if (roworiented) { 5075 if (ignorezeroentries && v[i*n] == 0.0) continue; 5076 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5077 } else { 5078 if (ignorezeroentries && v[i] == 0.0) continue; 5079 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5080 } 5081 } 5082 } 5083 }} 5084 PetscFunctionReturnVoid(); 5085 } 5086 EXTERN_C_END 5087 5088