1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap.n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap.n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap.n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap.n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 311 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap.n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 415 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap.range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap.n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap.N; 932 header[2] = mat->cmap.N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap.n; 938 range = mat->rmap.range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap.n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap.n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap.n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap.n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 1114 1115 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1116 if (!rank) { 1117 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1118 } else { 1119 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1120 } 1121 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1122 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1123 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1125 1126 /* copy over the A part */ 1127 Aloc = (Mat_SeqAIJ*)aij->A->data; 1128 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1129 row = mat->rmap.rstart; 1130 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 1131 for (i=0; i<m; i++) { 1132 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1133 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1134 } 1135 aj = Aloc->j; 1136 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 1137 1138 /* copy over the B part */ 1139 Aloc = (Mat_SeqAIJ*)aij->B->data; 1140 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1141 row = mat->rmap.rstart; 1142 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1143 ct = cols; 1144 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1148 } 1149 ierr = PetscFree(ct);CHKERRQ(ierr); 1150 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1151 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 /* 1153 Everyone has to call to draw the matrix since the graphics waits are 1154 synchronized across all processors that share the PetscDraw object 1155 */ 1156 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1157 if (!rank) { 1158 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1159 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1160 } 1161 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1162 ierr = MatDestroy(A);CHKERRQ(ierr); 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatView_MPIAIJ" 1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscErrorCode ierr; 1172 PetscTruth iascii,isdraw,issocket,isbinary; 1173 1174 PetscFunctionBegin; 1175 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1176 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1177 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1178 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1179 if (iascii || isdraw || isbinary || issocket) { 1180 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1181 } else { 1182 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRelax_MPIAIJ" 1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1190 { 1191 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1192 PetscErrorCode ierr; 1193 Vec bb1; 1194 1195 PetscFunctionBegin; 1196 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1197 1198 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1199 if (flag & SOR_ZERO_INITIAL_GUESS) { 1200 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1201 its--; 1202 } 1203 1204 while (its--) { 1205 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 1208 /* update rhs: bb1 = bb - B*x */ 1209 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1211 1212 /* local sweep */ 1213 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1214 } 1215 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1216 if (flag & SOR_ZERO_INITIAL_GUESS) { 1217 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1218 its--; 1219 } 1220 while (its--) { 1221 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 1224 /* update rhs: bb1 = bb - B*x */ 1225 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1226 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1227 1228 /* local sweep */ 1229 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1230 } 1231 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1232 if (flag & SOR_ZERO_INITIAL_GUESS) { 1233 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1234 its--; 1235 } 1236 while (its--) { 1237 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* update rhs: bb1 = bb - B*x */ 1241 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1242 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1243 1244 /* local sweep */ 1245 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1246 } 1247 } else { 1248 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1249 } 1250 1251 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatPermute_MPIAIJ" 1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1258 { 1259 MPI_Comm comm,pcomm; 1260 PetscInt first,local_size,nrows,*rows; 1261 int ntids; 1262 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1267 /* make a collective version of 'rowp' */ 1268 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1269 if (pcomm==comm) { 1270 crowp = rowp; 1271 } else { 1272 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1273 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1274 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1275 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1276 } 1277 /* collect the global row permutation and invert it */ 1278 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1279 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1280 if (pcomm!=comm) { 1281 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1282 } 1283 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1284 /* get the local target indices */ 1285 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1286 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1287 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1288 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1289 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1290 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1291 /* the column permutation is so much easier; 1292 make a local version of 'colp' and invert it */ 1293 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1294 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1295 if (ntids==1) { 1296 lcolp = colp; 1297 } else { 1298 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1299 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1300 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1301 } 1302 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1303 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1304 if (ntids>1) { 1305 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1306 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1307 } 1308 /* now we just get the submatrix */ 1309 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1310 /* clean up */ 1311 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1312 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1319 { 1320 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1321 Mat A = mat->A,B = mat->B; 1322 PetscErrorCode ierr; 1323 PetscReal isend[5],irecv[5]; 1324 1325 PetscFunctionBegin; 1326 info->block_size = 1.0; 1327 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1328 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1329 isend[3] = info->memory; isend[4] = info->mallocs; 1330 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1331 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1332 isend[3] += info->memory; isend[4] += info->mallocs; 1333 if (flag == MAT_LOCAL) { 1334 info->nz_used = isend[0]; 1335 info->nz_allocated = isend[1]; 1336 info->nz_unneeded = isend[2]; 1337 info->memory = isend[3]; 1338 info->mallocs = isend[4]; 1339 } else if (flag == MAT_GLOBAL_MAX) { 1340 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1341 info->nz_used = irecv[0]; 1342 info->nz_allocated = irecv[1]; 1343 info->nz_unneeded = irecv[2]; 1344 info->memory = irecv[3]; 1345 info->mallocs = irecv[4]; 1346 } else if (flag == MAT_GLOBAL_SUM) { 1347 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1348 info->nz_used = irecv[0]; 1349 info->nz_allocated = irecv[1]; 1350 info->nz_unneeded = irecv[2]; 1351 info->memory = irecv[3]; 1352 info->mallocs = irecv[4]; 1353 } 1354 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1355 info->fill_ratio_needed = 0; 1356 info->factor_mallocs = 0; 1357 info->rows_global = (double)matin->rmap.N; 1358 info->columns_global = (double)matin->cmap.N; 1359 info->rows_local = (double)matin->rmap.n; 1360 info->columns_local = (double)matin->cmap.N; 1361 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatSetOption_MPIAIJ" 1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1368 { 1369 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 switch (op) { 1374 case MAT_NEW_NONZERO_LOCATIONS: 1375 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1376 case MAT_KEEP_ZEROED_ROWS: 1377 case MAT_NEW_NONZERO_LOCATION_ERR: 1378 case MAT_USE_INODES: 1379 case MAT_IGNORE_ZERO_ENTRIES: 1380 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1381 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1382 break; 1383 case MAT_ROW_ORIENTED: 1384 a->roworiented = flg; 1385 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1386 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1387 break; 1388 case MAT_NEW_DIAGONALS: 1389 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1390 break; 1391 case MAT_IGNORE_OFF_PROC_ENTRIES: 1392 a->donotstash = PETSC_TRUE; 1393 break; 1394 case MAT_SYMMETRIC: 1395 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1396 break; 1397 case MAT_STRUCTURALLY_SYMMETRIC: 1398 case MAT_HERMITIAN: 1399 case MAT_SYMMETRY_ETERNAL: 1400 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1401 break; 1402 default: 1403 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatGetRow_MPIAIJ" 1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1411 { 1412 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1413 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1414 PetscErrorCode ierr; 1415 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1416 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1417 PetscInt *cmap,*idx_p; 1418 1419 PetscFunctionBegin; 1420 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1421 mat->getrowactive = PETSC_TRUE; 1422 1423 if (!mat->rowvalues && (idx || v)) { 1424 /* 1425 allocate enough space to hold information from the longest row. 1426 */ 1427 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1428 PetscInt max = 1,tmp; 1429 for (i=0; i<matin->rmap.n; i++) { 1430 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1431 if (max < tmp) { max = tmp; } 1432 } 1433 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1434 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1435 } 1436 1437 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1438 lrow = row - rstart; 1439 1440 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1441 if (!v) {pvA = 0; pvB = 0;} 1442 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1443 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1444 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1445 nztot = nzA + nzB; 1446 1447 cmap = mat->garray; 1448 if (v || idx) { 1449 if (nztot) { 1450 /* Sort by increasing column numbers, assuming A and B already sorted */ 1451 PetscInt imark = -1; 1452 if (v) { 1453 *v = v_p = mat->rowvalues; 1454 for (i=0; i<nzB; i++) { 1455 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1456 else break; 1457 } 1458 imark = i; 1459 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1460 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1461 } 1462 if (idx) { 1463 *idx = idx_p = mat->rowindices; 1464 if (imark > -1) { 1465 for (i=0; i<imark; i++) { 1466 idx_p[i] = cmap[cworkB[i]]; 1467 } 1468 } else { 1469 for (i=0; i<nzB; i++) { 1470 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1471 else break; 1472 } 1473 imark = i; 1474 } 1475 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1476 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1477 } 1478 } else { 1479 if (idx) *idx = 0; 1480 if (v) *v = 0; 1481 } 1482 } 1483 *nz = nztot; 1484 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1485 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1492 { 1493 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1494 1495 PetscFunctionBegin; 1496 if (!aij->getrowactive) { 1497 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1498 } 1499 aij->getrowactive = PETSC_FALSE; 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatNorm_MPIAIJ" 1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1506 { 1507 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1508 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1509 PetscErrorCode ierr; 1510 PetscInt i,j,cstart = mat->cmap.rstart; 1511 PetscReal sum = 0.0; 1512 MatScalar *v; 1513 1514 PetscFunctionBegin; 1515 if (aij->size == 1) { 1516 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1517 } else { 1518 if (type == NORM_FROBENIUS) { 1519 v = amat->a; 1520 for (i=0; i<amat->nz; i++) { 1521 #if defined(PETSC_USE_COMPLEX) 1522 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1523 #else 1524 sum += (*v)*(*v); v++; 1525 #endif 1526 } 1527 v = bmat->a; 1528 for (i=0; i<bmat->nz; i++) { 1529 #if defined(PETSC_USE_COMPLEX) 1530 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1531 #else 1532 sum += (*v)*(*v); v++; 1533 #endif 1534 } 1535 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1536 *norm = sqrt(*norm); 1537 } else if (type == NORM_1) { /* max column norm */ 1538 PetscReal *tmp,*tmp2; 1539 PetscInt *jj,*garray = aij->garray; 1540 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1541 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1542 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1543 *norm = 0.0; 1544 v = amat->a; jj = amat->j; 1545 for (j=0; j<amat->nz; j++) { 1546 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1547 } 1548 v = bmat->a; jj = bmat->j; 1549 for (j=0; j<bmat->nz; j++) { 1550 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1551 } 1552 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1553 for (j=0; j<mat->cmap.N; j++) { 1554 if (tmp2[j] > *norm) *norm = tmp2[j]; 1555 } 1556 ierr = PetscFree(tmp);CHKERRQ(ierr); 1557 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1558 } else if (type == NORM_INFINITY) { /* max row norm */ 1559 PetscReal ntemp = 0.0; 1560 for (j=0; j<aij->A->rmap.n; j++) { 1561 v = amat->a + amat->i[j]; 1562 sum = 0.0; 1563 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1564 sum += PetscAbsScalar(*v); v++; 1565 } 1566 v = bmat->a + bmat->i[j]; 1567 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1568 sum += PetscAbsScalar(*v); v++; 1569 } 1570 if (sum > ntemp) ntemp = sum; 1571 } 1572 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1573 } else { 1574 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1575 } 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatTranspose_MPIAIJ" 1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1583 { 1584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1585 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1586 PetscErrorCode ierr; 1587 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1588 PetscInt cstart=A->cmap.rstart,ncol; 1589 Mat B; 1590 MatScalar *array; 1591 1592 PetscFunctionBegin; 1593 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1594 1595 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1596 ai = Aloc->i; aj = Aloc->j; 1597 bi = Bloc->i; bj = Bloc->j; 1598 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1599 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1600 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1601 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1602 for (i=0; i<ai[ma]; i++){ 1603 d_nnz[aj[i]] ++; 1604 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1605 } 1606 1607 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1608 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1609 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1610 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1611 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1612 } else { 1613 B = *matout; 1614 } 1615 1616 /* copy over the A part */ 1617 array = Aloc->a; 1618 row = A->rmap.rstart; 1619 for (i=0; i<ma; i++) { 1620 ncol = ai[i+1]-ai[i]; 1621 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1622 row++; array += ncol; aj += ncol; 1623 } 1624 aj = Aloc->j; 1625 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1626 1627 /* copy over the B part */ 1628 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1629 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1630 array = Bloc->a; 1631 row = A->rmap.rstart; 1632 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1633 cols_tmp = cols; 1634 for (i=0; i<mb; i++) { 1635 ncol = bi[i+1]-bi[i]; 1636 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1637 row++; array += ncol; cols_tmp += ncol; 1638 } 1639 ierr = PetscFree(cols);CHKERRQ(ierr); 1640 1641 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1644 *matout = B; 1645 } else { 1646 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1647 } 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1653 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1654 { 1655 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1656 Mat a = aij->A,b = aij->B; 1657 PetscErrorCode ierr; 1658 PetscInt s1,s2,s3; 1659 1660 PetscFunctionBegin; 1661 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1662 if (rr) { 1663 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1664 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1665 /* Overlap communication with computation. */ 1666 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 } 1668 if (ll) { 1669 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1670 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1671 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1672 } 1673 /* scale the diagonal block */ 1674 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1675 1676 if (rr) { 1677 /* Do a scatter end and then right scale the off-diagonal block */ 1678 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1680 } 1681 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1687 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1688 { 1689 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1690 PetscErrorCode ierr; 1691 1692 PetscFunctionBegin; 1693 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1694 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1699 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1700 { 1701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1702 PetscErrorCode ierr; 1703 1704 PetscFunctionBegin; 1705 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatEqual_MPIAIJ" 1711 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1712 { 1713 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1714 Mat a,b,c,d; 1715 PetscTruth flg; 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 a = matA->A; b = matA->B; 1720 c = matB->A; d = matB->B; 1721 1722 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1723 if (flg) { 1724 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1725 } 1726 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatCopy_MPIAIJ" 1732 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1733 { 1734 PetscErrorCode ierr; 1735 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1736 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1737 1738 PetscFunctionBegin; 1739 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1740 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1741 /* because of the column compression in the off-processor part of the matrix a->B, 1742 the number of columns in a->B and b->B may be different, hence we cannot call 1743 the MatCopy() directly on the two parts. If need be, we can provide a more 1744 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1745 then copying the submatrices */ 1746 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1747 } else { 1748 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1749 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1756 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1757 { 1758 PetscErrorCode ierr; 1759 1760 PetscFunctionBegin; 1761 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #include "petscblaslapack.h" 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatAXPY_MPIAIJ" 1768 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1769 { 1770 PetscErrorCode ierr; 1771 PetscInt i; 1772 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1773 PetscBLASInt bnz,one=1; 1774 Mat_SeqAIJ *x,*y; 1775 1776 PetscFunctionBegin; 1777 if (str == SAME_NONZERO_PATTERN) { 1778 PetscScalar alpha = a; 1779 x = (Mat_SeqAIJ *)xx->A->data; 1780 y = (Mat_SeqAIJ *)yy->A->data; 1781 bnz = PetscBLASIntCast(x->nz); 1782 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1783 x = (Mat_SeqAIJ *)xx->B->data; 1784 y = (Mat_SeqAIJ *)yy->B->data; 1785 bnz = PetscBLASIntCast(x->nz); 1786 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1787 } else if (str == SUBSET_NONZERO_PATTERN) { 1788 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1789 1790 x = (Mat_SeqAIJ *)xx->B->data; 1791 y = (Mat_SeqAIJ *)yy->B->data; 1792 if (y->xtoy && y->XtoY != xx->B) { 1793 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1794 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1795 } 1796 if (!y->xtoy) { /* get xtoy */ 1797 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1798 y->XtoY = xx->B; 1799 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1800 } 1801 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1802 } else { 1803 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1804 } 1805 PetscFunctionReturn(0); 1806 } 1807 1808 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatConjugate_MPIAIJ" 1812 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1813 { 1814 #if defined(PETSC_USE_COMPLEX) 1815 PetscErrorCode ierr; 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1817 1818 PetscFunctionBegin; 1819 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1820 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1821 #else 1822 PetscFunctionBegin; 1823 #endif 1824 PetscFunctionReturn(0); 1825 } 1826 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatRealPart_MPIAIJ" 1829 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1830 { 1831 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1832 PetscErrorCode ierr; 1833 1834 PetscFunctionBegin; 1835 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1836 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1837 PetscFunctionReturn(0); 1838 } 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1842 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1843 { 1844 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1845 PetscErrorCode ierr; 1846 1847 PetscFunctionBegin; 1848 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1849 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 #ifdef PETSC_HAVE_PBGL 1854 1855 #include <boost/parallel/mpi/bsp_process_group.hpp> 1856 #include <boost/graph/distributed/ilu_default_graph.hpp> 1857 #include <boost/graph/distributed/ilu_0_block.hpp> 1858 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1859 #include <boost/graph/distributed/petsc/interface.hpp> 1860 #include <boost/multi_array.hpp> 1861 #include <boost/parallel/distributed_property_map.hpp> 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1865 /* 1866 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1867 */ 1868 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1869 { 1870 namespace petsc = boost::distributed::petsc; 1871 1872 namespace graph_dist = boost::graph::distributed; 1873 using boost::graph::distributed::ilu_default::process_group_type; 1874 using boost::graph::ilu_permuted; 1875 1876 PetscTruth row_identity, col_identity; 1877 PetscContainer c; 1878 PetscInt m, n, M, N; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1883 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1884 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1885 if (!row_identity || !col_identity) { 1886 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1887 } 1888 1889 process_group_type pg; 1890 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1891 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1892 lgraph_type& level_graph = *lgraph_p; 1893 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1894 1895 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1896 ilu_permuted(level_graph); 1897 1898 /* put together the new matrix */ 1899 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1900 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1901 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1902 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1903 ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1904 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1905 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1906 (*fact)->factor = FACTOR_LU; 1907 1908 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1909 ierr = PetscContainerSetPointer(c, lgraph_p); 1910 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1916 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1917 { 1918 PetscFunctionBegin; 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatSolve_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1928 { 1929 namespace graph_dist = boost::graph::distributed; 1930 1931 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1932 lgraph_type* lgraph_p; 1933 PetscContainer c; 1934 PetscErrorCode ierr; 1935 1936 PetscFunctionBegin; 1937 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1938 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1939 ierr = VecCopy(b, x); CHKERRQ(ierr); 1940 1941 PetscScalar* array_x; 1942 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1943 PetscInt sx; 1944 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1945 1946 PetscScalar* array_b; 1947 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1948 PetscInt sb; 1949 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1950 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1955 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1956 ref_x(array_x, boost::extents[num_vertices(graph)]); 1957 1958 typedef boost::iterator_property_map<array_ref_type::iterator, 1959 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1960 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1961 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1962 1963 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1964 1965 PetscFunctionReturn(0); 1966 } 1967 #endif 1968 1969 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1970 PetscInt nzlocal,nsends,nrecvs; 1971 PetscMPIInt *send_rank; 1972 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1973 PetscScalar *sbuf_a,**rbuf_a; 1974 PetscErrorCode (*MatDestroy)(Mat); 1975 } Mat_Redundant; 1976 1977 #undef __FUNCT__ 1978 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1979 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1980 { 1981 PetscErrorCode ierr; 1982 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1983 PetscInt i; 1984 1985 PetscFunctionBegin; 1986 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1987 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1988 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1989 for (i=0; i<redund->nrecvs; i++){ 1990 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1991 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1992 } 1993 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1994 ierr = PetscFree(redund);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatDestroy_MatRedundant" 2000 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2001 { 2002 PetscErrorCode ierr; 2003 PetscContainer container; 2004 Mat_Redundant *redund=PETSC_NULL; 2005 2006 PetscFunctionBegin; 2007 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2008 if (container) { 2009 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2010 } else { 2011 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2012 } 2013 A->ops->destroy = redund->MatDestroy; 2014 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2015 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2016 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2022 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2023 { 2024 PetscMPIInt rank,size; 2025 MPI_Comm comm=((PetscObject)mat)->comm; 2026 PetscErrorCode ierr; 2027 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2028 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2029 PetscInt *rowrange=mat->rmap.range; 2030 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2031 Mat A=aij->A,B=aij->B,C=*matredundant; 2032 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2033 PetscScalar *sbuf_a; 2034 PetscInt nzlocal=a->nz+b->nz; 2035 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2036 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 2037 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2038 MatScalar *aworkA,*aworkB; 2039 PetscScalar *vals; 2040 PetscMPIInt tag1,tag2,tag3,imdex; 2041 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2042 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2043 MPI_Status recv_status,*send_status; 2044 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2045 PetscInt **rbuf_j=PETSC_NULL; 2046 PetscScalar **rbuf_a=PETSC_NULL; 2047 Mat_Redundant *redund=PETSC_NULL; 2048 PetscContainer container; 2049 2050 PetscFunctionBegin; 2051 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2052 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2053 2054 if (reuse == MAT_REUSE_MATRIX) { 2055 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2056 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2057 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2058 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2059 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2060 if (container) { 2061 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2062 } else { 2063 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2064 } 2065 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2066 2067 nsends = redund->nsends; 2068 nrecvs = redund->nrecvs; 2069 send_rank = redund->send_rank; recv_rank = send_rank + size; 2070 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2071 sbuf_j = redund->sbuf_j; 2072 sbuf_a = redund->sbuf_a; 2073 rbuf_j = redund->rbuf_j; 2074 rbuf_a = redund->rbuf_a; 2075 } 2076 2077 if (reuse == MAT_INITIAL_MATRIX){ 2078 PetscMPIInt subrank,subsize; 2079 PetscInt nleftover,np_subcomm; 2080 /* get the destination processors' id send_rank, nsends and nrecvs */ 2081 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2082 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2083 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2084 recv_rank = send_rank + size; 2085 np_subcomm = size/nsubcomm; 2086 nleftover = size - nsubcomm*np_subcomm; 2087 nsends = 0; nrecvs = 0; 2088 for (i=0; i<size; i++){ /* i=rank*/ 2089 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2090 send_rank[nsends] = i; nsends++; 2091 recv_rank[nrecvs++] = i; 2092 } 2093 } 2094 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2095 i = size-nleftover-1; 2096 j = 0; 2097 while (j < nsubcomm - nleftover){ 2098 send_rank[nsends++] = i; 2099 i--; j++; 2100 } 2101 } 2102 2103 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2104 for (i=0; i<nleftover; i++){ 2105 recv_rank[nrecvs++] = size-nleftover+i; 2106 } 2107 } 2108 2109 /* allocate sbuf_j, sbuf_a */ 2110 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2111 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2112 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2113 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2114 2115 /* copy mat's local entries into the buffers */ 2116 if (reuse == MAT_INITIAL_MATRIX){ 2117 rownz_max = 0; 2118 rptr = sbuf_j; 2119 cols = sbuf_j + rend-rstart + 1; 2120 vals = sbuf_a; 2121 rptr[0] = 0; 2122 for (i=0; i<rend-rstart; i++){ 2123 row = i + rstart; 2124 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2125 ncols = nzA + nzB; 2126 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2127 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2128 /* load the column indices for this row into cols */ 2129 lwrite = 0; 2130 for (l=0; l<nzB; l++) { 2131 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2132 vals[lwrite] = aworkB[l]; 2133 cols[lwrite++] = ctmp; 2134 } 2135 } 2136 for (l=0; l<nzA; l++){ 2137 vals[lwrite] = aworkA[l]; 2138 cols[lwrite++] = cstart + cworkA[l]; 2139 } 2140 for (l=0; l<nzB; l++) { 2141 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2142 vals[lwrite] = aworkB[l]; 2143 cols[lwrite++] = ctmp; 2144 } 2145 } 2146 vals += ncols; 2147 cols += ncols; 2148 rptr[i+1] = rptr[i] + ncols; 2149 if (rownz_max < ncols) rownz_max = ncols; 2150 } 2151 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2152 } else { /* only copy matrix values into sbuf_a */ 2153 rptr = sbuf_j; 2154 vals = sbuf_a; 2155 rptr[0] = 0; 2156 for (i=0; i<rend-rstart; i++){ 2157 row = i + rstart; 2158 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2159 ncols = nzA + nzB; 2160 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2161 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2162 lwrite = 0; 2163 for (l=0; l<nzB; l++) { 2164 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2165 } 2166 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2167 for (l=0; l<nzB; l++) { 2168 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2169 } 2170 vals += ncols; 2171 rptr[i+1] = rptr[i] + ncols; 2172 } 2173 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2174 2175 /* send nzlocal to others, and recv other's nzlocal */ 2176 /*--------------------------------------------------*/ 2177 if (reuse == MAT_INITIAL_MATRIX){ 2178 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2179 s_waits2 = s_waits3 + nsends; 2180 s_waits1 = s_waits2 + nsends; 2181 r_waits1 = s_waits1 + nsends; 2182 r_waits2 = r_waits1 + nrecvs; 2183 r_waits3 = r_waits2 + nrecvs; 2184 } else { 2185 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2186 r_waits3 = s_waits3 + nsends; 2187 } 2188 2189 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2190 if (reuse == MAT_INITIAL_MATRIX){ 2191 /* get new tags to keep the communication clean */ 2192 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2193 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2194 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2195 rbuf_nz = sbuf_nz + nsends; 2196 2197 /* post receives of other's nzlocal */ 2198 for (i=0; i<nrecvs; i++){ 2199 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2200 } 2201 /* send nzlocal to others */ 2202 for (i=0; i<nsends; i++){ 2203 sbuf_nz[i] = nzlocal; 2204 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2205 } 2206 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2207 count = nrecvs; 2208 while (count) { 2209 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2210 recv_rank[imdex] = recv_status.MPI_SOURCE; 2211 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2212 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2213 2214 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2215 rbuf_nz[imdex] += i + 2; 2216 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2217 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2218 count--; 2219 } 2220 /* wait on sends of nzlocal */ 2221 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2222 /* send mat->i,j to others, and recv from other's */ 2223 /*------------------------------------------------*/ 2224 for (i=0; i<nsends; i++){ 2225 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2226 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2227 } 2228 /* wait on receives of mat->i,j */ 2229 /*------------------------------*/ 2230 count = nrecvs; 2231 while (count) { 2232 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2233 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2234 count--; 2235 } 2236 /* wait on sends of mat->i,j */ 2237 /*---------------------------*/ 2238 if (nsends) { 2239 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2240 } 2241 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2242 2243 /* post receives, send and receive mat->a */ 2244 /*----------------------------------------*/ 2245 for (imdex=0; imdex<nrecvs; imdex++) { 2246 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2247 } 2248 for (i=0; i<nsends; i++){ 2249 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2250 } 2251 count = nrecvs; 2252 while (count) { 2253 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2254 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2255 count--; 2256 } 2257 if (nsends) { 2258 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2259 } 2260 2261 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2262 2263 /* create redundant matrix */ 2264 /*-------------------------*/ 2265 if (reuse == MAT_INITIAL_MATRIX){ 2266 /* compute rownz_max for preallocation */ 2267 for (imdex=0; imdex<nrecvs; imdex++){ 2268 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2269 rptr = rbuf_j[imdex]; 2270 for (i=0; i<j; i++){ 2271 ncols = rptr[i+1] - rptr[i]; 2272 if (rownz_max < ncols) rownz_max = ncols; 2273 } 2274 } 2275 2276 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2277 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2278 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2279 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2280 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2281 } else { 2282 C = *matredundant; 2283 } 2284 2285 /* insert local matrix entries */ 2286 rptr = sbuf_j; 2287 cols = sbuf_j + rend-rstart + 1; 2288 vals = sbuf_a; 2289 for (i=0; i<rend-rstart; i++){ 2290 row = i + rstart; 2291 ncols = rptr[i+1] - rptr[i]; 2292 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2293 vals += ncols; 2294 cols += ncols; 2295 } 2296 /* insert received matrix entries */ 2297 for (imdex=0; imdex<nrecvs; imdex++){ 2298 rstart = rowrange[recv_rank[imdex]]; 2299 rend = rowrange[recv_rank[imdex]+1]; 2300 rptr = rbuf_j[imdex]; 2301 cols = rbuf_j[imdex] + rend-rstart + 1; 2302 vals = rbuf_a[imdex]; 2303 for (i=0; i<rend-rstart; i++){ 2304 row = i + rstart; 2305 ncols = rptr[i+1] - rptr[i]; 2306 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2307 vals += ncols; 2308 cols += ncols; 2309 } 2310 } 2311 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2312 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2313 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2314 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2315 if (reuse == MAT_INITIAL_MATRIX){ 2316 PetscContainer container; 2317 *matredundant = C; 2318 /* create a supporting struct and attach it to C for reuse */ 2319 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2320 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2321 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2322 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2323 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2324 2325 redund->nzlocal = nzlocal; 2326 redund->nsends = nsends; 2327 redund->nrecvs = nrecvs; 2328 redund->send_rank = send_rank; 2329 redund->sbuf_nz = sbuf_nz; 2330 redund->sbuf_j = sbuf_j; 2331 redund->sbuf_a = sbuf_a; 2332 redund->rbuf_j = rbuf_j; 2333 redund->rbuf_a = rbuf_a; 2334 2335 redund->MatDestroy = C->ops->destroy; 2336 C->ops->destroy = MatDestroy_MatRedundant; 2337 } 2338 PetscFunctionReturn(0); 2339 } 2340 2341 #undef __FUNCT__ 2342 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2343 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2344 { 2345 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2346 PetscInt n = A->rmap.n; 2347 PetscInt cstart = A->cmap.rstart; 2348 PetscInt *cmap = mat->garray; 2349 PetscInt *diagIdx, *offdiagIdx; 2350 Vec diagV, offdiagV; 2351 PetscScalar *a, *diagA, *offdiagA; 2352 PetscInt r; 2353 PetscErrorCode ierr; 2354 2355 PetscFunctionBegin; 2356 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2357 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2358 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2359 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2360 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2361 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2362 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2363 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2364 for(r = 0; r < n; ++r) { 2365 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2366 a[r] = diagA[r]; 2367 idx[r] = cstart + diagIdx[r]; 2368 } else { 2369 a[r] = offdiagA[r]; 2370 idx[r] = cmap[offdiagIdx[r]]; 2371 } 2372 } 2373 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2374 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2375 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2376 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2377 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2378 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2384 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2385 { 2386 PetscErrorCode ierr; 2387 2388 PetscFunctionBegin; 2389 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 /* -------------------------------------------------------------------*/ 2394 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2395 MatGetRow_MPIAIJ, 2396 MatRestoreRow_MPIAIJ, 2397 MatMult_MPIAIJ, 2398 /* 4*/ MatMultAdd_MPIAIJ, 2399 MatMultTranspose_MPIAIJ, 2400 MatMultTransposeAdd_MPIAIJ, 2401 #ifdef PETSC_HAVE_PBGL 2402 MatSolve_MPIAIJ, 2403 #else 2404 0, 2405 #endif 2406 0, 2407 0, 2408 /*10*/ 0, 2409 0, 2410 0, 2411 MatRelax_MPIAIJ, 2412 MatTranspose_MPIAIJ, 2413 /*15*/ MatGetInfo_MPIAIJ, 2414 MatEqual_MPIAIJ, 2415 MatGetDiagonal_MPIAIJ, 2416 MatDiagonalScale_MPIAIJ, 2417 MatNorm_MPIAIJ, 2418 /*20*/ MatAssemblyBegin_MPIAIJ, 2419 MatAssemblyEnd_MPIAIJ, 2420 0, 2421 MatSetOption_MPIAIJ, 2422 MatZeroEntries_MPIAIJ, 2423 /*25*/ MatZeroRows_MPIAIJ, 2424 0, 2425 #ifdef PETSC_HAVE_PBGL 2426 MatLUFactorNumeric_MPIAIJ, 2427 #else 2428 0, 2429 #endif 2430 0, 2431 0, 2432 /*30*/ MatSetUpPreallocation_MPIAIJ, 2433 #ifdef PETSC_HAVE_PBGL 2434 MatILUFactorSymbolic_MPIAIJ, 2435 #else 2436 0, 2437 #endif 2438 0, 2439 0, 2440 0, 2441 /*35*/ MatDuplicate_MPIAIJ, 2442 0, 2443 0, 2444 0, 2445 0, 2446 /*40*/ MatAXPY_MPIAIJ, 2447 MatGetSubMatrices_MPIAIJ, 2448 MatIncreaseOverlap_MPIAIJ, 2449 MatGetValues_MPIAIJ, 2450 MatCopy_MPIAIJ, 2451 /*45*/ 0, 2452 MatScale_MPIAIJ, 2453 0, 2454 0, 2455 0, 2456 /*50*/ MatSetBlockSize_MPIAIJ, 2457 0, 2458 0, 2459 0, 2460 0, 2461 /*55*/ MatFDColoringCreate_MPIAIJ, 2462 0, 2463 MatSetUnfactored_MPIAIJ, 2464 MatPermute_MPIAIJ, 2465 0, 2466 /*60*/ MatGetSubMatrix_MPIAIJ, 2467 MatDestroy_MPIAIJ, 2468 MatView_MPIAIJ, 2469 0, 2470 0, 2471 /*65*/ 0, 2472 0, 2473 0, 2474 0, 2475 0, 2476 /*70*/ 0, 2477 0, 2478 MatSetColoring_MPIAIJ, 2479 #if defined(PETSC_HAVE_ADIC) 2480 MatSetValuesAdic_MPIAIJ, 2481 #else 2482 0, 2483 #endif 2484 MatSetValuesAdifor_MPIAIJ, 2485 /*75*/ 0, 2486 0, 2487 0, 2488 0, 2489 0, 2490 /*80*/ 0, 2491 0, 2492 0, 2493 0, 2494 /*84*/ MatLoad_MPIAIJ, 2495 0, 2496 0, 2497 0, 2498 0, 2499 0, 2500 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2501 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2502 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2503 MatPtAP_Basic, 2504 MatPtAPSymbolic_MPIAIJ, 2505 /*95*/ MatPtAPNumeric_MPIAIJ, 2506 0, 2507 0, 2508 0, 2509 0, 2510 /*100*/0, 2511 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2512 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2513 MatConjugate_MPIAIJ, 2514 0, 2515 /*105*/MatSetValuesRow_MPIAIJ, 2516 MatRealPart_MPIAIJ, 2517 MatImaginaryPart_MPIAIJ, 2518 0, 2519 0, 2520 /*110*/0, 2521 MatGetRedundantMatrix_MPIAIJ, 2522 MatGetRowMin_MPIAIJ, 2523 0, 2524 0, 2525 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2526 2527 /* ----------------------------------------------------------------------------------------*/ 2528 2529 EXTERN_C_BEGIN 2530 #undef __FUNCT__ 2531 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2532 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2533 { 2534 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2535 PetscErrorCode ierr; 2536 2537 PetscFunctionBegin; 2538 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2539 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2540 PetscFunctionReturn(0); 2541 } 2542 EXTERN_C_END 2543 2544 EXTERN_C_BEGIN 2545 #undef __FUNCT__ 2546 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2547 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2548 { 2549 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2550 PetscErrorCode ierr; 2551 2552 PetscFunctionBegin; 2553 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2554 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2555 PetscFunctionReturn(0); 2556 } 2557 EXTERN_C_END 2558 2559 #include "petscpc.h" 2560 EXTERN_C_BEGIN 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2563 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2564 { 2565 Mat_MPIAIJ *b; 2566 PetscErrorCode ierr; 2567 PetscInt i; 2568 2569 PetscFunctionBegin; 2570 B->preallocated = PETSC_TRUE; 2571 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2572 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2573 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2574 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2575 2576 B->rmap.bs = B->cmap.bs = 1; 2577 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2578 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2579 if (d_nnz) { 2580 for (i=0; i<B->rmap.n; i++) { 2581 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2582 } 2583 } 2584 if (o_nnz) { 2585 for (i=0; i<B->rmap.n; i++) { 2586 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2587 } 2588 } 2589 b = (Mat_MPIAIJ*)B->data; 2590 2591 /* Explicitly create 2 MATSEQAIJ matrices. */ 2592 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2593 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2594 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2595 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2596 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2597 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2598 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2599 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2600 2601 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2602 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2603 2604 PetscFunctionReturn(0); 2605 } 2606 EXTERN_C_END 2607 2608 #undef __FUNCT__ 2609 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2610 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2611 { 2612 Mat mat; 2613 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2614 PetscErrorCode ierr; 2615 2616 PetscFunctionBegin; 2617 *newmat = 0; 2618 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2619 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2620 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2621 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2622 a = (Mat_MPIAIJ*)mat->data; 2623 2624 mat->factor = matin->factor; 2625 mat->rmap.bs = matin->rmap.bs; 2626 mat->assembled = PETSC_TRUE; 2627 mat->insertmode = NOT_SET_VALUES; 2628 mat->preallocated = PETSC_TRUE; 2629 2630 a->size = oldmat->size; 2631 a->rank = oldmat->rank; 2632 a->donotstash = oldmat->donotstash; 2633 a->roworiented = oldmat->roworiented; 2634 a->rowindices = 0; 2635 a->rowvalues = 0; 2636 a->getrowactive = PETSC_FALSE; 2637 2638 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2639 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2640 2641 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2642 if (oldmat->colmap) { 2643 #if defined (PETSC_USE_CTABLE) 2644 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2645 #else 2646 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2647 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2648 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2649 #endif 2650 } else a->colmap = 0; 2651 if (oldmat->garray) { 2652 PetscInt len; 2653 len = oldmat->B->cmap.n; 2654 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2655 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2656 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2657 } else a->garray = 0; 2658 2659 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2660 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2661 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2662 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2663 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2664 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2665 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2666 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2667 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2668 *newmat = mat; 2669 PetscFunctionReturn(0); 2670 } 2671 2672 #include "petscsys.h" 2673 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatLoad_MPIAIJ" 2676 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2677 { 2678 Mat A; 2679 PetscScalar *vals,*svals; 2680 MPI_Comm comm = ((PetscObject)viewer)->comm; 2681 MPI_Status status; 2682 PetscErrorCode ierr; 2683 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2684 PetscInt i,nz,j,rstart,rend,mmax; 2685 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2686 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2687 PetscInt cend,cstart,n,*rowners; 2688 int fd; 2689 2690 PetscFunctionBegin; 2691 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2692 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2693 if (!rank) { 2694 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2695 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2696 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2697 } 2698 2699 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2700 M = header[1]; N = header[2]; 2701 /* determine ownership of all rows */ 2702 m = M/size + ((M % size) > rank); 2703 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2704 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2705 2706 /* First process needs enough room for process with most rows */ 2707 if (!rank) { 2708 mmax = rowners[1]; 2709 for (i=2; i<size; i++) { 2710 mmax = PetscMax(mmax,rowners[i]); 2711 } 2712 } else mmax = m; 2713 2714 rowners[0] = 0; 2715 for (i=2; i<=size; i++) { 2716 rowners[i] += rowners[i-1]; 2717 } 2718 rstart = rowners[rank]; 2719 rend = rowners[rank+1]; 2720 2721 /* distribute row lengths to all processors */ 2722 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2723 if (!rank) { 2724 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2725 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2726 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2727 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2728 for (j=0; j<m; j++) { 2729 procsnz[0] += ourlens[j]; 2730 } 2731 for (i=1; i<size; i++) { 2732 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2733 /* calculate the number of nonzeros on each processor */ 2734 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2735 procsnz[i] += rowlengths[j]; 2736 } 2737 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2738 } 2739 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2740 } else { 2741 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2742 } 2743 2744 if (!rank) { 2745 /* determine max buffer needed and allocate it */ 2746 maxnz = 0; 2747 for (i=0; i<size; i++) { 2748 maxnz = PetscMax(maxnz,procsnz[i]); 2749 } 2750 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2751 2752 /* read in my part of the matrix column indices */ 2753 nz = procsnz[0]; 2754 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2755 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2756 2757 /* read in every one elses and ship off */ 2758 for (i=1; i<size; i++) { 2759 nz = procsnz[i]; 2760 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2761 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2762 } 2763 ierr = PetscFree(cols);CHKERRQ(ierr); 2764 } else { 2765 /* determine buffer space needed for message */ 2766 nz = 0; 2767 for (i=0; i<m; i++) { 2768 nz += ourlens[i]; 2769 } 2770 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2771 2772 /* receive message of column indices*/ 2773 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2774 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2775 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2776 } 2777 2778 /* determine column ownership if matrix is not square */ 2779 if (N != M) { 2780 n = N/size + ((N % size) > rank); 2781 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2782 cstart = cend - n; 2783 } else { 2784 cstart = rstart; 2785 cend = rend; 2786 n = cend - cstart; 2787 } 2788 2789 /* loop over local rows, determining number of off diagonal entries */ 2790 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2791 jj = 0; 2792 for (i=0; i<m; i++) { 2793 for (j=0; j<ourlens[i]; j++) { 2794 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2795 jj++; 2796 } 2797 } 2798 2799 /* create our matrix */ 2800 for (i=0; i<m; i++) { 2801 ourlens[i] -= offlens[i]; 2802 } 2803 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2804 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2805 ierr = MatSetType(A,type);CHKERRQ(ierr); 2806 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2807 2808 for (i=0; i<m; i++) { 2809 ourlens[i] += offlens[i]; 2810 } 2811 2812 if (!rank) { 2813 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2814 2815 /* read in my part of the matrix numerical values */ 2816 nz = procsnz[0]; 2817 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2818 2819 /* insert into matrix */ 2820 jj = rstart; 2821 smycols = mycols; 2822 svals = vals; 2823 for (i=0; i<m; i++) { 2824 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2825 smycols += ourlens[i]; 2826 svals += ourlens[i]; 2827 jj++; 2828 } 2829 2830 /* read in other processors and ship out */ 2831 for (i=1; i<size; i++) { 2832 nz = procsnz[i]; 2833 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2834 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2835 } 2836 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2837 } else { 2838 /* receive numeric values */ 2839 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2840 2841 /* receive message of values*/ 2842 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2843 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2844 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2845 2846 /* insert into matrix */ 2847 jj = rstart; 2848 smycols = mycols; 2849 svals = vals; 2850 for (i=0; i<m; i++) { 2851 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2852 smycols += ourlens[i]; 2853 svals += ourlens[i]; 2854 jj++; 2855 } 2856 } 2857 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2858 ierr = PetscFree(vals);CHKERRQ(ierr); 2859 ierr = PetscFree(mycols);CHKERRQ(ierr); 2860 ierr = PetscFree(rowners);CHKERRQ(ierr); 2861 2862 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2863 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2864 *newmat = A; 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2870 /* 2871 Not great since it makes two copies of the submatrix, first an SeqAIJ 2872 in local and then by concatenating the local matrices the end result. 2873 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2874 */ 2875 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2876 { 2877 PetscErrorCode ierr; 2878 PetscMPIInt rank,size; 2879 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2880 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2881 Mat *local,M,Mreuse; 2882 MatScalar *vwork,*aa; 2883 MPI_Comm comm = ((PetscObject)mat)->comm; 2884 Mat_SeqAIJ *aij; 2885 2886 2887 PetscFunctionBegin; 2888 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2889 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2890 2891 if (call == MAT_REUSE_MATRIX) { 2892 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2893 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2894 local = &Mreuse; 2895 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2896 } else { 2897 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2898 Mreuse = *local; 2899 ierr = PetscFree(local);CHKERRQ(ierr); 2900 } 2901 2902 /* 2903 m - number of local rows 2904 n - number of columns (same on all processors) 2905 rstart - first row in new global matrix generated 2906 */ 2907 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2908 if (call == MAT_INITIAL_MATRIX) { 2909 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2910 ii = aij->i; 2911 jj = aij->j; 2912 2913 /* 2914 Determine the number of non-zeros in the diagonal and off-diagonal 2915 portions of the matrix in order to do correct preallocation 2916 */ 2917 2918 /* first get start and end of "diagonal" columns */ 2919 if (csize == PETSC_DECIDE) { 2920 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2921 if (mglobal == n) { /* square matrix */ 2922 nlocal = m; 2923 } else { 2924 nlocal = n/size + ((n % size) > rank); 2925 } 2926 } else { 2927 nlocal = csize; 2928 } 2929 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2930 rstart = rend - nlocal; 2931 if (rank == size - 1 && rend != n) { 2932 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2933 } 2934 2935 /* next, compute all the lengths */ 2936 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2937 olens = dlens + m; 2938 for (i=0; i<m; i++) { 2939 jend = ii[i+1] - ii[i]; 2940 olen = 0; 2941 dlen = 0; 2942 for (j=0; j<jend; j++) { 2943 if (*jj < rstart || *jj >= rend) olen++; 2944 else dlen++; 2945 jj++; 2946 } 2947 olens[i] = olen; 2948 dlens[i] = dlen; 2949 } 2950 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2951 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2952 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2953 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2954 ierr = PetscFree(dlens);CHKERRQ(ierr); 2955 } else { 2956 PetscInt ml,nl; 2957 2958 M = *newmat; 2959 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2960 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2961 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2962 /* 2963 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2964 rather than the slower MatSetValues(). 2965 */ 2966 M->was_assembled = PETSC_TRUE; 2967 M->assembled = PETSC_FALSE; 2968 } 2969 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2970 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2971 ii = aij->i; 2972 jj = aij->j; 2973 aa = aij->a; 2974 for (i=0; i<m; i++) { 2975 row = rstart + i; 2976 nz = ii[i+1] - ii[i]; 2977 cwork = jj; jj += nz; 2978 vwork = aa; aa += nz; 2979 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2980 } 2981 2982 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2983 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2984 *newmat = M; 2985 2986 /* save submatrix used in processor for next request */ 2987 if (call == MAT_INITIAL_MATRIX) { 2988 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2989 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2990 } 2991 2992 PetscFunctionReturn(0); 2993 } 2994 2995 EXTERN_C_BEGIN 2996 #undef __FUNCT__ 2997 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2998 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2999 { 3000 PetscInt m,cstart, cend,j,nnz,i,d; 3001 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3002 const PetscInt *JJ; 3003 PetscScalar *values; 3004 PetscErrorCode ierr; 3005 3006 PetscFunctionBegin; 3007 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3008 3009 B->rmap.bs = B->cmap.bs = 1; 3010 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 3011 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 3012 m = B->rmap.n; 3013 cstart = B->cmap.rstart; 3014 cend = B->cmap.rend; 3015 rstart = B->rmap.rstart; 3016 3017 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3018 o_nnz = d_nnz + m; 3019 3020 #if defined(PETSC_USE_DEBUGGING) 3021 for (i=0; i<m; i++) { 3022 nnz = Ii[i+1]- Ii[i]; 3023 JJ = J + Ii[i]; 3024 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3025 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3026 if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N); 3027 for (j=1; j<nnz; j++) { 3028 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3029 } 3030 } 3031 #endif 3032 3033 for (i=0; i<m; i++) { 3034 nnz = Ii[i+1]- Ii[i]; 3035 JJ = J + Ii[i]; 3036 nnz_max = PetscMax(nnz_max,nnz); 3037 for (j=0; j<nnz; j++) { 3038 if (*JJ >= cstart) break; 3039 JJ++; 3040 } 3041 d = 0; 3042 for (; j<nnz; j++) { 3043 if (*JJ++ >= cend) break; 3044 d++; 3045 } 3046 d_nnz[i] = d; 3047 o_nnz[i] = nnz - d; 3048 } 3049 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3050 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3051 3052 if (v) values = (PetscScalar*)v; 3053 else { 3054 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3055 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3056 } 3057 3058 for (i=0; i<m; i++) { 3059 ii = i + rstart; 3060 nnz = Ii[i+1]- Ii[i]; 3061 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3062 } 3063 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3064 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3065 3066 if (!v) { 3067 ierr = PetscFree(values);CHKERRQ(ierr); 3068 } 3069 PetscFunctionReturn(0); 3070 } 3071 EXTERN_C_END 3072 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3075 /*@ 3076 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3077 (the default parallel PETSc format). 3078 3079 Collective on MPI_Comm 3080 3081 Input Parameters: 3082 + B - the matrix 3083 . i - the indices into j for the start of each local row (starts with zero) 3084 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3085 - v - optional values in the matrix 3086 3087 Level: developer 3088 3089 Notes: 3090 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3091 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3092 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3093 3094 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3095 3096 The format which is used for the sparse matrix input, is equivalent to a 3097 row-major ordering.. i.e for the following matrix, the input data expected is 3098 as shown: 3099 3100 1 0 0 3101 2 0 3 P0 3102 ------- 3103 4 5 6 P1 3104 3105 Process0 [P0]: rows_owned=[0,1] 3106 i = {0,1,3} [size = nrow+1 = 2+1] 3107 j = {0,0,2} [size = nz = 6] 3108 v = {1,2,3} [size = nz = 6] 3109 3110 Process1 [P1]: rows_owned=[2] 3111 i = {0,3} [size = nrow+1 = 1+1] 3112 j = {0,1,2} [size = nz = 6] 3113 v = {4,5,6} [size = nz = 6] 3114 3115 The column indices for each row MUST be sorted. 3116 3117 .keywords: matrix, aij, compressed row, sparse, parallel 3118 3119 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3120 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3121 @*/ 3122 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3123 { 3124 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3125 3126 PetscFunctionBegin; 3127 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3128 if (f) { 3129 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3130 } 3131 PetscFunctionReturn(0); 3132 } 3133 3134 #undef __FUNCT__ 3135 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3136 /*@C 3137 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3138 (the default parallel PETSc format). For good matrix assembly performance 3139 the user should preallocate the matrix storage by setting the parameters 3140 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3141 performance can be increased by more than a factor of 50. 3142 3143 Collective on MPI_Comm 3144 3145 Input Parameters: 3146 + A - the matrix 3147 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3148 (same value is used for all local rows) 3149 . d_nnz - array containing the number of nonzeros in the various rows of the 3150 DIAGONAL portion of the local submatrix (possibly different for each row) 3151 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3152 The size of this array is equal to the number of local rows, i.e 'm'. 3153 You must leave room for the diagonal entry even if it is zero. 3154 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3155 submatrix (same value is used for all local rows). 3156 - o_nnz - array containing the number of nonzeros in the various rows of the 3157 OFF-DIAGONAL portion of the local submatrix (possibly different for 3158 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3159 structure. The size of this array is equal to the number 3160 of local rows, i.e 'm'. 3161 3162 If the *_nnz parameter is given then the *_nz parameter is ignored 3163 3164 The AIJ format (also called the Yale sparse matrix format or 3165 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3166 storage. The stored row and column indices begin with zero. See the users manual for details. 3167 3168 The parallel matrix is partitioned such that the first m0 rows belong to 3169 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3170 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3171 3172 The DIAGONAL portion of the local submatrix of a processor can be defined 3173 as the submatrix which is obtained by extraction the part corresponding 3174 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3175 first row that belongs to the processor, and r2 is the last row belonging 3176 to the this processor. This is a square mxm matrix. The remaining portion 3177 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3178 3179 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3180 3181 You can call MatGetInfo() to get information on how effective the preallocation was; 3182 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3183 You can also run with the option -info and look for messages with the string 3184 malloc in them to see if additional memory allocation was needed. 3185 3186 Example usage: 3187 3188 Consider the following 8x8 matrix with 34 non-zero values, that is 3189 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3190 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3191 as follows: 3192 3193 .vb 3194 1 2 0 | 0 3 0 | 0 4 3195 Proc0 0 5 6 | 7 0 0 | 8 0 3196 9 0 10 | 11 0 0 | 12 0 3197 ------------------------------------- 3198 13 0 14 | 15 16 17 | 0 0 3199 Proc1 0 18 0 | 19 20 21 | 0 0 3200 0 0 0 | 22 23 0 | 24 0 3201 ------------------------------------- 3202 Proc2 25 26 27 | 0 0 28 | 29 0 3203 30 0 0 | 31 32 33 | 0 34 3204 .ve 3205 3206 This can be represented as a collection of submatrices as: 3207 3208 .vb 3209 A B C 3210 D E F 3211 G H I 3212 .ve 3213 3214 Where the submatrices A,B,C are owned by proc0, D,E,F are 3215 owned by proc1, G,H,I are owned by proc2. 3216 3217 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3218 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3219 The 'M','N' parameters are 8,8, and have the same values on all procs. 3220 3221 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3222 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3223 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3224 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3225 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3226 matrix, ans [DF] as another SeqAIJ matrix. 3227 3228 When d_nz, o_nz parameters are specified, d_nz storage elements are 3229 allocated for every row of the local diagonal submatrix, and o_nz 3230 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3231 One way to choose d_nz and o_nz is to use the max nonzerors per local 3232 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3233 In this case, the values of d_nz,o_nz are: 3234 .vb 3235 proc0 : dnz = 2, o_nz = 2 3236 proc1 : dnz = 3, o_nz = 2 3237 proc2 : dnz = 1, o_nz = 4 3238 .ve 3239 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3240 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3241 for proc3. i.e we are using 12+15+10=37 storage locations to store 3242 34 values. 3243 3244 When d_nnz, o_nnz parameters are specified, the storage is specified 3245 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3246 In the above case the values for d_nnz,o_nnz are: 3247 .vb 3248 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3249 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3250 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3251 .ve 3252 Here the space allocated is sum of all the above values i.e 34, and 3253 hence pre-allocation is perfect. 3254 3255 Level: intermediate 3256 3257 .keywords: matrix, aij, compressed row, sparse, parallel 3258 3259 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3260 MPIAIJ, MatGetInfo() 3261 @*/ 3262 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3263 { 3264 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3265 3266 PetscFunctionBegin; 3267 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3268 if (f) { 3269 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3270 } 3271 PetscFunctionReturn(0); 3272 } 3273 3274 #undef __FUNCT__ 3275 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3276 /*@ 3277 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3278 CSR format the local rows. 3279 3280 Collective on MPI_Comm 3281 3282 Input Parameters: 3283 + comm - MPI communicator 3284 . m - number of local rows (Cannot be PETSC_DECIDE) 3285 . n - This value should be the same as the local size used in creating the 3286 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3287 calculated if N is given) For square matrices n is almost always m. 3288 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3289 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3290 . i - row indices 3291 . j - column indices 3292 - a - matrix values 3293 3294 Output Parameter: 3295 . mat - the matrix 3296 3297 Level: intermediate 3298 3299 Notes: 3300 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3301 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3302 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3303 3304 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3305 3306 The format which is used for the sparse matrix input, is equivalent to a 3307 row-major ordering.. i.e for the following matrix, the input data expected is 3308 as shown: 3309 3310 1 0 0 3311 2 0 3 P0 3312 ------- 3313 4 5 6 P1 3314 3315 Process0 [P0]: rows_owned=[0,1] 3316 i = {0,1,3} [size = nrow+1 = 2+1] 3317 j = {0,0,2} [size = nz = 6] 3318 v = {1,2,3} [size = nz = 6] 3319 3320 Process1 [P1]: rows_owned=[2] 3321 i = {0,3} [size = nrow+1 = 1+1] 3322 j = {0,1,2} [size = nz = 6] 3323 v = {4,5,6} [size = nz = 6] 3324 3325 .keywords: matrix, aij, compressed row, sparse, parallel 3326 3327 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3328 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3329 @*/ 3330 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3331 { 3332 PetscErrorCode ierr; 3333 3334 PetscFunctionBegin; 3335 if (i[0]) { 3336 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3337 } 3338 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3339 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3340 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3341 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3342 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3343 PetscFunctionReturn(0); 3344 } 3345 3346 #undef __FUNCT__ 3347 #define __FUNCT__ "MatCreateMPIAIJ" 3348 /*@C 3349 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3350 (the default parallel PETSc format). For good matrix assembly performance 3351 the user should preallocate the matrix storage by setting the parameters 3352 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3353 performance can be increased by more than a factor of 50. 3354 3355 Collective on MPI_Comm 3356 3357 Input Parameters: 3358 + comm - MPI communicator 3359 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3360 This value should be the same as the local size used in creating the 3361 y vector for the matrix-vector product y = Ax. 3362 . n - This value should be the same as the local size used in creating the 3363 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3364 calculated if N is given) For square matrices n is almost always m. 3365 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3366 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3367 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3368 (same value is used for all local rows) 3369 . d_nnz - array containing the number of nonzeros in the various rows of the 3370 DIAGONAL portion of the local submatrix (possibly different for each row) 3371 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3372 The size of this array is equal to the number of local rows, i.e 'm'. 3373 You must leave room for the diagonal entry even if it is zero. 3374 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3375 submatrix (same value is used for all local rows). 3376 - o_nnz - array containing the number of nonzeros in the various rows of the 3377 OFF-DIAGONAL portion of the local submatrix (possibly different for 3378 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3379 structure. The size of this array is equal to the number 3380 of local rows, i.e 'm'. 3381 3382 Output Parameter: 3383 . A - the matrix 3384 3385 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3386 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3387 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3388 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3389 3390 Notes: 3391 If the *_nnz parameter is given then the *_nz parameter is ignored 3392 3393 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3394 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3395 storage requirements for this matrix. 3396 3397 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3398 processor than it must be used on all processors that share the object for 3399 that argument. 3400 3401 The user MUST specify either the local or global matrix dimensions 3402 (possibly both). 3403 3404 The parallel matrix is partitioned across processors such that the 3405 first m0 rows belong to process 0, the next m1 rows belong to 3406 process 1, the next m2 rows belong to process 2 etc.. where 3407 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3408 values corresponding to [m x N] submatrix. 3409 3410 The columns are logically partitioned with the n0 columns belonging 3411 to 0th partition, the next n1 columns belonging to the next 3412 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3413 3414 The DIAGONAL portion of the local submatrix on any given processor 3415 is the submatrix corresponding to the rows and columns m,n 3416 corresponding to the given processor. i.e diagonal matrix on 3417 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3418 etc. The remaining portion of the local submatrix [m x (N-n)] 3419 constitute the OFF-DIAGONAL portion. The example below better 3420 illustrates this concept. 3421 3422 For a square global matrix we define each processor's diagonal portion 3423 to be its local rows and the corresponding columns (a square submatrix); 3424 each processor's off-diagonal portion encompasses the remainder of the 3425 local matrix (a rectangular submatrix). 3426 3427 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3428 3429 When calling this routine with a single process communicator, a matrix of 3430 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3431 type of communicator, use the construction mechanism: 3432 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3433 3434 By default, this format uses inodes (identical nodes) when possible. 3435 We search for consecutive rows with the same nonzero structure, thereby 3436 reusing matrix information to achieve increased efficiency. 3437 3438 Options Database Keys: 3439 + -mat_no_inode - Do not use inodes 3440 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3441 - -mat_aij_oneindex - Internally use indexing starting at 1 3442 rather than 0. Note that when calling MatSetValues(), 3443 the user still MUST index entries starting at 0! 3444 3445 3446 Example usage: 3447 3448 Consider the following 8x8 matrix with 34 non-zero values, that is 3449 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3450 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3451 as follows: 3452 3453 .vb 3454 1 2 0 | 0 3 0 | 0 4 3455 Proc0 0 5 6 | 7 0 0 | 8 0 3456 9 0 10 | 11 0 0 | 12 0 3457 ------------------------------------- 3458 13 0 14 | 15 16 17 | 0 0 3459 Proc1 0 18 0 | 19 20 21 | 0 0 3460 0 0 0 | 22 23 0 | 24 0 3461 ------------------------------------- 3462 Proc2 25 26 27 | 0 0 28 | 29 0 3463 30 0 0 | 31 32 33 | 0 34 3464 .ve 3465 3466 This can be represented as a collection of submatrices as: 3467 3468 .vb 3469 A B C 3470 D E F 3471 G H I 3472 .ve 3473 3474 Where the submatrices A,B,C are owned by proc0, D,E,F are 3475 owned by proc1, G,H,I are owned by proc2. 3476 3477 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3478 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3479 The 'M','N' parameters are 8,8, and have the same values on all procs. 3480 3481 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3482 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3483 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3484 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3485 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3486 matrix, ans [DF] as another SeqAIJ matrix. 3487 3488 When d_nz, o_nz parameters are specified, d_nz storage elements are 3489 allocated for every row of the local diagonal submatrix, and o_nz 3490 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3491 One way to choose d_nz and o_nz is to use the max nonzerors per local 3492 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3493 In this case, the values of d_nz,o_nz are: 3494 .vb 3495 proc0 : dnz = 2, o_nz = 2 3496 proc1 : dnz = 3, o_nz = 2 3497 proc2 : dnz = 1, o_nz = 4 3498 .ve 3499 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3500 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3501 for proc3. i.e we are using 12+15+10=37 storage locations to store 3502 34 values. 3503 3504 When d_nnz, o_nnz parameters are specified, the storage is specified 3505 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3506 In the above case the values for d_nnz,o_nnz are: 3507 .vb 3508 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3509 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3510 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3511 .ve 3512 Here the space allocated is sum of all the above values i.e 34, and 3513 hence pre-allocation is perfect. 3514 3515 Level: intermediate 3516 3517 .keywords: matrix, aij, compressed row, sparse, parallel 3518 3519 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3520 MPIAIJ, MatCreateMPIAIJWithArrays() 3521 @*/ 3522 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3523 { 3524 PetscErrorCode ierr; 3525 PetscMPIInt size; 3526 3527 PetscFunctionBegin; 3528 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3529 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3530 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3531 if (size > 1) { 3532 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3533 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3534 } else { 3535 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3536 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3537 } 3538 PetscFunctionReturn(0); 3539 } 3540 3541 #undef __FUNCT__ 3542 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3543 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3544 { 3545 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3546 3547 PetscFunctionBegin; 3548 *Ad = a->A; 3549 *Ao = a->B; 3550 *colmap = a->garray; 3551 PetscFunctionReturn(0); 3552 } 3553 3554 #undef __FUNCT__ 3555 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3556 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3557 { 3558 PetscErrorCode ierr; 3559 PetscInt i; 3560 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3561 3562 PetscFunctionBegin; 3563 if (coloring->ctype == IS_COLORING_GLOBAL) { 3564 ISColoringValue *allcolors,*colors; 3565 ISColoring ocoloring; 3566 3567 /* set coloring for diagonal portion */ 3568 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3569 3570 /* set coloring for off-diagonal portion */ 3571 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3572 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3573 for (i=0; i<a->B->cmap.n; i++) { 3574 colors[i] = allcolors[a->garray[i]]; 3575 } 3576 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3577 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3578 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3579 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3580 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3581 ISColoringValue *colors; 3582 PetscInt *larray; 3583 ISColoring ocoloring; 3584 3585 /* set coloring for diagonal portion */ 3586 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3587 for (i=0; i<a->A->cmap.n; i++) { 3588 larray[i] = i + A->cmap.rstart; 3589 } 3590 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3591 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3592 for (i=0; i<a->A->cmap.n; i++) { 3593 colors[i] = coloring->colors[larray[i]]; 3594 } 3595 ierr = PetscFree(larray);CHKERRQ(ierr); 3596 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3597 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3598 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3599 3600 /* set coloring for off-diagonal portion */ 3601 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3602 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3603 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3604 for (i=0; i<a->B->cmap.n; i++) { 3605 colors[i] = coloring->colors[larray[i]]; 3606 } 3607 ierr = PetscFree(larray);CHKERRQ(ierr); 3608 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3609 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3610 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3611 } else { 3612 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3613 } 3614 3615 PetscFunctionReturn(0); 3616 } 3617 3618 #if defined(PETSC_HAVE_ADIC) 3619 #undef __FUNCT__ 3620 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3621 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3622 { 3623 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3624 PetscErrorCode ierr; 3625 3626 PetscFunctionBegin; 3627 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3628 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3629 PetscFunctionReturn(0); 3630 } 3631 #endif 3632 3633 #undef __FUNCT__ 3634 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3635 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3636 { 3637 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3638 PetscErrorCode ierr; 3639 3640 PetscFunctionBegin; 3641 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3642 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3643 PetscFunctionReturn(0); 3644 } 3645 3646 #undef __FUNCT__ 3647 #define __FUNCT__ "MatMerge" 3648 /*@ 3649 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3650 matrices from each processor 3651 3652 Collective on MPI_Comm 3653 3654 Input Parameters: 3655 + comm - the communicators the parallel matrix will live on 3656 . inmat - the input sequential matrices 3657 . n - number of local columns (or PETSC_DECIDE) 3658 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3659 3660 Output Parameter: 3661 . outmat - the parallel matrix generated 3662 3663 Level: advanced 3664 3665 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3666 3667 @*/ 3668 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3669 { 3670 PetscErrorCode ierr; 3671 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3672 PetscInt *indx; 3673 PetscScalar *values; 3674 3675 PetscFunctionBegin; 3676 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3677 if (scall == MAT_INITIAL_MATRIX){ 3678 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3679 if (n == PETSC_DECIDE){ 3680 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3681 } 3682 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3683 rstart -= m; 3684 3685 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3686 for (i=0;i<m;i++) { 3687 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3688 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3689 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3690 } 3691 /* This routine will ONLY return MPIAIJ type matrix */ 3692 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3693 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3694 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3695 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3696 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3697 3698 } else if (scall == MAT_REUSE_MATRIX){ 3699 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3700 } else { 3701 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3702 } 3703 3704 for (i=0;i<m;i++) { 3705 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3706 Ii = i + rstart; 3707 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3708 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3709 } 3710 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3711 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3712 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3713 3714 PetscFunctionReturn(0); 3715 } 3716 3717 #undef __FUNCT__ 3718 #define __FUNCT__ "MatFileSplit" 3719 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3720 { 3721 PetscErrorCode ierr; 3722 PetscMPIInt rank; 3723 PetscInt m,N,i,rstart,nnz; 3724 size_t len; 3725 const PetscInt *indx; 3726 PetscViewer out; 3727 char *name; 3728 Mat B; 3729 const PetscScalar *values; 3730 3731 PetscFunctionBegin; 3732 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3733 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3734 /* Should this be the type of the diagonal block of A? */ 3735 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3736 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3737 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3738 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3739 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3740 for (i=0;i<m;i++) { 3741 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3742 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3743 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3744 } 3745 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3746 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3747 3748 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3749 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3750 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3751 sprintf(name,"%s.%d",outfile,rank); 3752 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3753 ierr = PetscFree(name); 3754 ierr = MatView(B,out);CHKERRQ(ierr); 3755 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3756 ierr = MatDestroy(B);CHKERRQ(ierr); 3757 PetscFunctionReturn(0); 3758 } 3759 3760 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3761 #undef __FUNCT__ 3762 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3763 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3764 { 3765 PetscErrorCode ierr; 3766 Mat_Merge_SeqsToMPI *merge; 3767 PetscContainer container; 3768 3769 PetscFunctionBegin; 3770 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3771 if (container) { 3772 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3773 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3774 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3775 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3776 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3777 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3778 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3779 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3780 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3781 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3782 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3783 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3784 3785 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3786 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3787 } 3788 ierr = PetscFree(merge);CHKERRQ(ierr); 3789 3790 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3791 PetscFunctionReturn(0); 3792 } 3793 3794 #include "src/mat/utils/freespace.h" 3795 #include "petscbt.h" 3796 3797 #undef __FUNCT__ 3798 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3799 /*@C 3800 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3801 matrices from each processor 3802 3803 Collective on MPI_Comm 3804 3805 Input Parameters: 3806 + comm - the communicators the parallel matrix will live on 3807 . seqmat - the input sequential matrices 3808 . m - number of local rows (or PETSC_DECIDE) 3809 . n - number of local columns (or PETSC_DECIDE) 3810 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3811 3812 Output Parameter: 3813 . mpimat - the parallel matrix generated 3814 3815 Level: advanced 3816 3817 Notes: 3818 The dimensions of the sequential matrix in each processor MUST be the same. 3819 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3820 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3821 @*/ 3822 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3823 { 3824 PetscErrorCode ierr; 3825 MPI_Comm comm=((PetscObject)mpimat)->comm; 3826 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3827 PetscMPIInt size,rank,taga,*len_s; 3828 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3829 PetscInt proc,m; 3830 PetscInt **buf_ri,**buf_rj; 3831 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3832 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3833 MPI_Request *s_waits,*r_waits; 3834 MPI_Status *status; 3835 MatScalar *aa=a->a; 3836 MatScalar **abuf_r,*ba_i; 3837 Mat_Merge_SeqsToMPI *merge; 3838 PetscContainer container; 3839 3840 PetscFunctionBegin; 3841 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3842 3843 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3844 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3845 3846 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3847 if (container) { 3848 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3849 } 3850 bi = merge->bi; 3851 bj = merge->bj; 3852 buf_ri = merge->buf_ri; 3853 buf_rj = merge->buf_rj; 3854 3855 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3856 owners = merge->rowmap.range; 3857 len_s = merge->len_s; 3858 3859 /* send and recv matrix values */ 3860 /*-----------------------------*/ 3861 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3862 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3863 3864 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3865 for (proc=0,k=0; proc<size; proc++){ 3866 if (!len_s[proc]) continue; 3867 i = owners[proc]; 3868 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3869 k++; 3870 } 3871 3872 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3873 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3874 ierr = PetscFree(status);CHKERRQ(ierr); 3875 3876 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3877 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3878 3879 /* insert mat values of mpimat */ 3880 /*----------------------------*/ 3881 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 3882 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3883 nextrow = buf_ri_k + merge->nrecv; 3884 nextai = nextrow + merge->nrecv; 3885 3886 for (k=0; k<merge->nrecv; k++){ 3887 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3888 nrows = *(buf_ri_k[k]); 3889 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3890 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3891 } 3892 3893 /* set values of ba */ 3894 m = merge->rowmap.n; 3895 for (i=0; i<m; i++) { 3896 arow = owners[rank] + i; 3897 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3898 bnzi = bi[i+1] - bi[i]; 3899 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3900 3901 /* add local non-zero vals of this proc's seqmat into ba */ 3902 anzi = ai[arow+1] - ai[arow]; 3903 aj = a->j + ai[arow]; 3904 aa = a->a + ai[arow]; 3905 nextaj = 0; 3906 for (j=0; nextaj<anzi; j++){ 3907 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3908 ba_i[j] += aa[nextaj++]; 3909 } 3910 } 3911 3912 /* add received vals into ba */ 3913 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3914 /* i-th row */ 3915 if (i == *nextrow[k]) { 3916 anzi = *(nextai[k]+1) - *nextai[k]; 3917 aj = buf_rj[k] + *(nextai[k]); 3918 aa = abuf_r[k] + *(nextai[k]); 3919 nextaj = 0; 3920 for (j=0; nextaj<anzi; j++){ 3921 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3922 ba_i[j] += aa[nextaj++]; 3923 } 3924 } 3925 nextrow[k]++; nextai[k]++; 3926 } 3927 } 3928 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3929 } 3930 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3931 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3932 3933 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3934 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3935 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3936 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3937 PetscFunctionReturn(0); 3938 } 3939 3940 #undef __FUNCT__ 3941 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3942 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3943 { 3944 PetscErrorCode ierr; 3945 Mat B_mpi; 3946 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3947 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3948 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3949 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3950 PetscInt len,proc,*dnz,*onz; 3951 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3952 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3953 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3954 MPI_Status *status; 3955 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3956 PetscBT lnkbt; 3957 Mat_Merge_SeqsToMPI *merge; 3958 PetscContainer container; 3959 3960 PetscFunctionBegin; 3961 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3962 3963 /* make sure it is a PETSc comm */ 3964 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3965 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3966 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3967 3968 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3969 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3970 3971 /* determine row ownership */ 3972 /*---------------------------------------------------------*/ 3973 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3974 merge->rowmap.n = m; 3975 merge->rowmap.N = M; 3976 merge->rowmap.bs = 1; 3977 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3978 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3979 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3980 3981 m = merge->rowmap.n; 3982 M = merge->rowmap.N; 3983 owners = merge->rowmap.range; 3984 3985 /* determine the number of messages to send, their lengths */ 3986 /*---------------------------------------------------------*/ 3987 len_s = merge->len_s; 3988 3989 len = 0; /* length of buf_si[] */ 3990 merge->nsend = 0; 3991 for (proc=0; proc<size; proc++){ 3992 len_si[proc] = 0; 3993 if (proc == rank){ 3994 len_s[proc] = 0; 3995 } else { 3996 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3997 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3998 } 3999 if (len_s[proc]) { 4000 merge->nsend++; 4001 nrows = 0; 4002 for (i=owners[proc]; i<owners[proc+1]; i++){ 4003 if (ai[i+1] > ai[i]) nrows++; 4004 } 4005 len_si[proc] = 2*(nrows+1); 4006 len += len_si[proc]; 4007 } 4008 } 4009 4010 /* determine the number and length of messages to receive for ij-structure */ 4011 /*-------------------------------------------------------------------------*/ 4012 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4013 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4014 4015 /* post the Irecv of j-structure */ 4016 /*-------------------------------*/ 4017 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4018 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4019 4020 /* post the Isend of j-structure */ 4021 /*--------------------------------*/ 4022 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4023 sj_waits = si_waits + merge->nsend; 4024 4025 for (proc=0, k=0; proc<size; proc++){ 4026 if (!len_s[proc]) continue; 4027 i = owners[proc]; 4028 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4029 k++; 4030 } 4031 4032 /* receives and sends of j-structure are complete */ 4033 /*------------------------------------------------*/ 4034 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4035 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4036 4037 /* send and recv i-structure */ 4038 /*---------------------------*/ 4039 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4040 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4041 4042 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4043 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4044 for (proc=0,k=0; proc<size; proc++){ 4045 if (!len_s[proc]) continue; 4046 /* form outgoing message for i-structure: 4047 buf_si[0]: nrows to be sent 4048 [1:nrows]: row index (global) 4049 [nrows+1:2*nrows+1]: i-structure index 4050 */ 4051 /*-------------------------------------------*/ 4052 nrows = len_si[proc]/2 - 1; 4053 buf_si_i = buf_si + nrows+1; 4054 buf_si[0] = nrows; 4055 buf_si_i[0] = 0; 4056 nrows = 0; 4057 for (i=owners[proc]; i<owners[proc+1]; i++){ 4058 anzi = ai[i+1] - ai[i]; 4059 if (anzi) { 4060 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4061 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4062 nrows++; 4063 } 4064 } 4065 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4066 k++; 4067 buf_si += len_si[proc]; 4068 } 4069 4070 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4071 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4072 4073 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4074 for (i=0; i<merge->nrecv; i++){ 4075 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4076 } 4077 4078 ierr = PetscFree(len_si);CHKERRQ(ierr); 4079 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4080 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4081 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4082 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4083 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4084 ierr = PetscFree(status);CHKERRQ(ierr); 4085 4086 /* compute a local seq matrix in each processor */ 4087 /*----------------------------------------------*/ 4088 /* allocate bi array and free space for accumulating nonzero column info */ 4089 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4090 bi[0] = 0; 4091 4092 /* create and initialize a linked list */ 4093 nlnk = N+1; 4094 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4095 4096 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4097 len = 0; 4098 len = ai[owners[rank+1]] - ai[owners[rank]]; 4099 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4100 current_space = free_space; 4101 4102 /* determine symbolic info for each local row */ 4103 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4104 nextrow = buf_ri_k + merge->nrecv; 4105 nextai = nextrow + merge->nrecv; 4106 for (k=0; k<merge->nrecv; k++){ 4107 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4108 nrows = *buf_ri_k[k]; 4109 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4110 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4111 } 4112 4113 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4114 len = 0; 4115 for (i=0;i<m;i++) { 4116 bnzi = 0; 4117 /* add local non-zero cols of this proc's seqmat into lnk */ 4118 arow = owners[rank] + i; 4119 anzi = ai[arow+1] - ai[arow]; 4120 aj = a->j + ai[arow]; 4121 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4122 bnzi += nlnk; 4123 /* add received col data into lnk */ 4124 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4125 if (i == *nextrow[k]) { /* i-th row */ 4126 anzi = *(nextai[k]+1) - *nextai[k]; 4127 aj = buf_rj[k] + *nextai[k]; 4128 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4129 bnzi += nlnk; 4130 nextrow[k]++; nextai[k]++; 4131 } 4132 } 4133 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4134 4135 /* if free space is not available, make more free space */ 4136 if (current_space->local_remaining<bnzi) { 4137 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4138 nspacedouble++; 4139 } 4140 /* copy data into free space, then initialize lnk */ 4141 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4142 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4143 4144 current_space->array += bnzi; 4145 current_space->local_used += bnzi; 4146 current_space->local_remaining -= bnzi; 4147 4148 bi[i+1] = bi[i] + bnzi; 4149 } 4150 4151 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4152 4153 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4154 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4155 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4156 4157 /* create symbolic parallel matrix B_mpi */ 4158 /*---------------------------------------*/ 4159 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4160 if (n==PETSC_DECIDE) { 4161 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4162 } else { 4163 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4164 } 4165 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4166 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4167 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4168 4169 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4170 B_mpi->assembled = PETSC_FALSE; 4171 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4172 merge->bi = bi; 4173 merge->bj = bj; 4174 merge->buf_ri = buf_ri; 4175 merge->buf_rj = buf_rj; 4176 merge->coi = PETSC_NULL; 4177 merge->coj = PETSC_NULL; 4178 merge->owners_co = PETSC_NULL; 4179 4180 /* attach the supporting struct to B_mpi for reuse */ 4181 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4182 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4183 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4184 *mpimat = B_mpi; 4185 4186 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4187 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4188 PetscFunctionReturn(0); 4189 } 4190 4191 #undef __FUNCT__ 4192 #define __FUNCT__ "MatMerge_SeqsToMPI" 4193 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4194 { 4195 PetscErrorCode ierr; 4196 4197 PetscFunctionBegin; 4198 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4199 if (scall == MAT_INITIAL_MATRIX){ 4200 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4201 } 4202 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4203 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4204 PetscFunctionReturn(0); 4205 } 4206 4207 #undef __FUNCT__ 4208 #define __FUNCT__ "MatGetLocalMat" 4209 /*@ 4210 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4211 4212 Not Collective 4213 4214 Input Parameters: 4215 + A - the matrix 4216 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4217 4218 Output Parameter: 4219 . A_loc - the local sequential matrix generated 4220 4221 Level: developer 4222 4223 @*/ 4224 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4225 { 4226 PetscErrorCode ierr; 4227 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4228 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4229 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4230 MatScalar *aa=a->a,*ba=b->a,*cam; 4231 PetscScalar *ca; 4232 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 4233 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4234 4235 PetscFunctionBegin; 4236 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4237 if (scall == MAT_INITIAL_MATRIX){ 4238 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4239 ci[0] = 0; 4240 for (i=0; i<am; i++){ 4241 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4242 } 4243 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4244 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4245 k = 0; 4246 for (i=0; i<am; i++) { 4247 ncols_o = bi[i+1] - bi[i]; 4248 ncols_d = ai[i+1] - ai[i]; 4249 /* off-diagonal portion of A */ 4250 for (jo=0; jo<ncols_o; jo++) { 4251 col = cmap[*bj]; 4252 if (col >= cstart) break; 4253 cj[k] = col; bj++; 4254 ca[k++] = *ba++; 4255 } 4256 /* diagonal portion of A */ 4257 for (j=0; j<ncols_d; j++) { 4258 cj[k] = cstart + *aj++; 4259 ca[k++] = *aa++; 4260 } 4261 /* off-diagonal portion of A */ 4262 for (j=jo; j<ncols_o; j++) { 4263 cj[k] = cmap[*bj++]; 4264 ca[k++] = *ba++; 4265 } 4266 } 4267 /* put together the new matrix */ 4268 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4269 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4270 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4271 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4272 mat->free_a = PETSC_TRUE; 4273 mat->free_ij = PETSC_TRUE; 4274 mat->nonew = 0; 4275 } else if (scall == MAT_REUSE_MATRIX){ 4276 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4277 ci = mat->i; cj = mat->j; cam = mat->a; 4278 for (i=0; i<am; i++) { 4279 /* off-diagonal portion of A */ 4280 ncols_o = bi[i+1] - bi[i]; 4281 for (jo=0; jo<ncols_o; jo++) { 4282 col = cmap[*bj]; 4283 if (col >= cstart) break; 4284 *cam++ = *ba++; bj++; 4285 } 4286 /* diagonal portion of A */ 4287 ncols_d = ai[i+1] - ai[i]; 4288 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4289 /* off-diagonal portion of A */ 4290 for (j=jo; j<ncols_o; j++) { 4291 *cam++ = *ba++; bj++; 4292 } 4293 } 4294 } else { 4295 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4296 } 4297 4298 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4299 PetscFunctionReturn(0); 4300 } 4301 4302 #undef __FUNCT__ 4303 #define __FUNCT__ "MatGetLocalMatCondensed" 4304 /*@C 4305 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4306 4307 Not Collective 4308 4309 Input Parameters: 4310 + A - the matrix 4311 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4312 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4313 4314 Output Parameter: 4315 . A_loc - the local sequential matrix generated 4316 4317 Level: developer 4318 4319 @*/ 4320 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4321 { 4322 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4323 PetscErrorCode ierr; 4324 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4325 IS isrowa,iscola; 4326 Mat *aloc; 4327 4328 PetscFunctionBegin; 4329 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4330 if (!row){ 4331 start = A->rmap.rstart; end = A->rmap.rend; 4332 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4333 } else { 4334 isrowa = *row; 4335 } 4336 if (!col){ 4337 start = A->cmap.rstart; 4338 cmap = a->garray; 4339 nzA = a->A->cmap.n; 4340 nzB = a->B->cmap.n; 4341 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4342 ncols = 0; 4343 for (i=0; i<nzB; i++) { 4344 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4345 else break; 4346 } 4347 imark = i; 4348 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4349 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4350 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4351 ierr = PetscFree(idx);CHKERRQ(ierr); 4352 } else { 4353 iscola = *col; 4354 } 4355 if (scall != MAT_INITIAL_MATRIX){ 4356 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4357 aloc[0] = *A_loc; 4358 } 4359 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4360 *A_loc = aloc[0]; 4361 ierr = PetscFree(aloc);CHKERRQ(ierr); 4362 if (!row){ 4363 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4364 } 4365 if (!col){ 4366 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4367 } 4368 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4369 PetscFunctionReturn(0); 4370 } 4371 4372 #undef __FUNCT__ 4373 #define __FUNCT__ "MatGetBrowsOfAcols" 4374 /*@C 4375 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4376 4377 Collective on Mat 4378 4379 Input Parameters: 4380 + A,B - the matrices in mpiaij format 4381 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4382 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4383 4384 Output Parameter: 4385 + rowb, colb - index sets of rows and columns of B to extract 4386 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4387 - B_seq - the sequential matrix generated 4388 4389 Level: developer 4390 4391 @*/ 4392 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4393 { 4394 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4395 PetscErrorCode ierr; 4396 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4397 IS isrowb,iscolb; 4398 Mat *bseq; 4399 4400 PetscFunctionBegin; 4401 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4402 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4403 } 4404 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4405 4406 if (scall == MAT_INITIAL_MATRIX){ 4407 start = A->cmap.rstart; 4408 cmap = a->garray; 4409 nzA = a->A->cmap.n; 4410 nzB = a->B->cmap.n; 4411 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4412 ncols = 0; 4413 for (i=0; i<nzB; i++) { /* row < local row index */ 4414 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4415 else break; 4416 } 4417 imark = i; 4418 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4419 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4420 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4421 ierr = PetscFree(idx);CHKERRQ(ierr); 4422 *brstart = imark; 4423 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4424 } else { 4425 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4426 isrowb = *rowb; iscolb = *colb; 4427 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4428 bseq[0] = *B_seq; 4429 } 4430 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4431 *B_seq = bseq[0]; 4432 ierr = PetscFree(bseq);CHKERRQ(ierr); 4433 if (!rowb){ 4434 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4435 } else { 4436 *rowb = isrowb; 4437 } 4438 if (!colb){ 4439 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4440 } else { 4441 *colb = iscolb; 4442 } 4443 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4444 PetscFunctionReturn(0); 4445 } 4446 4447 #undef __FUNCT__ 4448 #define __FUNCT__ "MatGetBrowsOfAoCols" 4449 /*@C 4450 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4451 of the OFF-DIAGONAL portion of local A 4452 4453 Collective on Mat 4454 4455 Input Parameters: 4456 + A,B - the matrices in mpiaij format 4457 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4458 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4459 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4460 4461 Output Parameter: 4462 + B_oth - the sequential matrix generated 4463 4464 Level: developer 4465 4466 @*/ 4467 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4468 { 4469 VecScatter_MPI_General *gen_to,*gen_from; 4470 PetscErrorCode ierr; 4471 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4472 Mat_SeqAIJ *b_oth; 4473 VecScatter ctx=a->Mvctx; 4474 MPI_Comm comm=((PetscObject)ctx)->comm; 4475 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4476 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4477 PetscScalar *rvalues,*svalues; 4478 MatScalar *b_otha,*bufa,*bufA; 4479 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4480 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4481 MPI_Status *sstatus,rstatus; 4482 PetscMPIInt jj; 4483 PetscInt *cols,sbs,rbs; 4484 PetscScalar *vals; 4485 4486 PetscFunctionBegin; 4487 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4488 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4489 } 4490 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4491 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4492 4493 gen_to = (VecScatter_MPI_General*)ctx->todata; 4494 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4495 rvalues = gen_from->values; /* holds the length of receiving row */ 4496 svalues = gen_to->values; /* holds the length of sending row */ 4497 nrecvs = gen_from->n; 4498 nsends = gen_to->n; 4499 4500 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4501 srow = gen_to->indices; /* local row index to be sent */ 4502 sstarts = gen_to->starts; 4503 sprocs = gen_to->procs; 4504 sstatus = gen_to->sstatus; 4505 sbs = gen_to->bs; 4506 rstarts = gen_from->starts; 4507 rprocs = gen_from->procs; 4508 rbs = gen_from->bs; 4509 4510 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4511 if (scall == MAT_INITIAL_MATRIX){ 4512 /* i-array */ 4513 /*---------*/ 4514 /* post receives */ 4515 for (i=0; i<nrecvs; i++){ 4516 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4517 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4518 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4519 } 4520 4521 /* pack the outgoing message */ 4522 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4523 rstartsj = sstartsj + nsends +1; 4524 sstartsj[0] = 0; rstartsj[0] = 0; 4525 len = 0; /* total length of j or a array to be sent */ 4526 k = 0; 4527 for (i=0; i<nsends; i++){ 4528 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4529 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4530 for (j=0; j<nrows; j++) { 4531 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4532 for (l=0; l<sbs; l++){ 4533 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4534 rowlen[j*sbs+l] = ncols; 4535 len += ncols; 4536 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4537 } 4538 k++; 4539 } 4540 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4541 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4542 } 4543 /* recvs and sends of i-array are completed */ 4544 i = nrecvs; 4545 while (i--) { 4546 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4547 } 4548 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4549 4550 /* allocate buffers for sending j and a arrays */ 4551 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4552 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4553 4554 /* create i-array of B_oth */ 4555 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4556 b_othi[0] = 0; 4557 len = 0; /* total length of j or a array to be received */ 4558 k = 0; 4559 for (i=0; i<nrecvs; i++){ 4560 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4561 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4562 for (j=0; j<nrows; j++) { 4563 b_othi[k+1] = b_othi[k] + rowlen[j]; 4564 len += rowlen[j]; k++; 4565 } 4566 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4567 } 4568 4569 /* allocate space for j and a arrrays of B_oth */ 4570 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4571 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4572 4573 /* j-array */ 4574 /*---------*/ 4575 /* post receives of j-array */ 4576 for (i=0; i<nrecvs; i++){ 4577 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4578 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4579 } 4580 4581 /* pack the outgoing message j-array */ 4582 k = 0; 4583 for (i=0; i<nsends; i++){ 4584 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4585 bufJ = bufj+sstartsj[i]; 4586 for (j=0; j<nrows; j++) { 4587 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4588 for (ll=0; ll<sbs; ll++){ 4589 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4590 for (l=0; l<ncols; l++){ 4591 *bufJ++ = cols[l]; 4592 } 4593 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4594 } 4595 } 4596 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4597 } 4598 4599 /* recvs and sends of j-array are completed */ 4600 i = nrecvs; 4601 while (i--) { 4602 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4603 } 4604 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4605 } else if (scall == MAT_REUSE_MATRIX){ 4606 sstartsj = *startsj; 4607 rstartsj = sstartsj + nsends +1; 4608 bufa = *bufa_ptr; 4609 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4610 b_otha = b_oth->a; 4611 } else { 4612 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4613 } 4614 4615 /* a-array */ 4616 /*---------*/ 4617 /* post receives of a-array */ 4618 for (i=0; i<nrecvs; i++){ 4619 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4620 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4621 } 4622 4623 /* pack the outgoing message a-array */ 4624 k = 0; 4625 for (i=0; i<nsends; i++){ 4626 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4627 bufA = bufa+sstartsj[i]; 4628 for (j=0; j<nrows; j++) { 4629 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4630 for (ll=0; ll<sbs; ll++){ 4631 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4632 for (l=0; l<ncols; l++){ 4633 *bufA++ = vals[l]; 4634 } 4635 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4636 } 4637 } 4638 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4639 } 4640 /* recvs and sends of a-array are completed */ 4641 i = nrecvs; 4642 while (i--) { 4643 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4644 } 4645 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4646 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4647 4648 if (scall == MAT_INITIAL_MATRIX){ 4649 /* put together the new matrix */ 4650 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4651 4652 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4653 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4654 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4655 b_oth->free_a = PETSC_TRUE; 4656 b_oth->free_ij = PETSC_TRUE; 4657 b_oth->nonew = 0; 4658 4659 ierr = PetscFree(bufj);CHKERRQ(ierr); 4660 if (!startsj || !bufa_ptr){ 4661 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4662 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4663 } else { 4664 *startsj = sstartsj; 4665 *bufa_ptr = bufa; 4666 } 4667 } 4668 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4669 PetscFunctionReturn(0); 4670 } 4671 4672 #undef __FUNCT__ 4673 #define __FUNCT__ "MatGetCommunicationStructs" 4674 /*@C 4675 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4676 4677 Not Collective 4678 4679 Input Parameters: 4680 . A - The matrix in mpiaij format 4681 4682 Output Parameter: 4683 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4684 . colmap - A map from global column index to local index into lvec 4685 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4686 4687 Level: developer 4688 4689 @*/ 4690 #if defined (PETSC_USE_CTABLE) 4691 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4692 #else 4693 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4694 #endif 4695 { 4696 Mat_MPIAIJ *a; 4697 4698 PetscFunctionBegin; 4699 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4700 PetscValidPointer(lvec, 2) 4701 PetscValidPointer(colmap, 3) 4702 PetscValidPointer(multScatter, 4) 4703 a = (Mat_MPIAIJ *) A->data; 4704 if (lvec) *lvec = a->lvec; 4705 if (colmap) *colmap = a->colmap; 4706 if (multScatter) *multScatter = a->Mvctx; 4707 PetscFunctionReturn(0); 4708 } 4709 4710 EXTERN_C_BEGIN 4711 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4712 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4713 EXTERN_C_END 4714 4715 #include "src/mat/impls/dense/mpi/mpidense.h" 4716 4717 #undef __FUNCT__ 4718 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4719 /* 4720 Computes (B'*A')' since computing B*A directly is untenable 4721 4722 n p p 4723 ( ) ( ) ( ) 4724 m ( A ) * n ( B ) = m ( C ) 4725 ( ) ( ) ( ) 4726 4727 */ 4728 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4729 { 4730 PetscErrorCode ierr; 4731 Mat At,Bt,Ct; 4732 4733 PetscFunctionBegin; 4734 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4735 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4736 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4737 ierr = MatDestroy(At);CHKERRQ(ierr); 4738 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4739 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4740 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4741 PetscFunctionReturn(0); 4742 } 4743 4744 #undef __FUNCT__ 4745 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4746 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4747 { 4748 PetscErrorCode ierr; 4749 PetscInt m=A->rmap.n,n=B->cmap.n; 4750 Mat Cmat; 4751 4752 PetscFunctionBegin; 4753 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 4754 ierr = MatCreate(A->hdr.comm,&Cmat);CHKERRQ(ierr); 4755 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4756 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4757 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4758 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4759 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4760 *C = Cmat; 4761 PetscFunctionReturn(0); 4762 } 4763 4764 /* ----------------------------------------------------------------*/ 4765 #undef __FUNCT__ 4766 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4767 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4768 { 4769 PetscErrorCode ierr; 4770 4771 PetscFunctionBegin; 4772 if (scall == MAT_INITIAL_MATRIX){ 4773 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4774 } 4775 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4776 PetscFunctionReturn(0); 4777 } 4778 4779 /*MC 4780 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4781 4782 Options Database Keys: 4783 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4784 4785 Level: beginner 4786 4787 .seealso: MatCreateMPIAIJ() 4788 M*/ 4789 4790 EXTERN_C_BEGIN 4791 #undef __FUNCT__ 4792 #define __FUNCT__ "MatCreate_MPIAIJ" 4793 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4794 { 4795 Mat_MPIAIJ *b; 4796 PetscErrorCode ierr; 4797 PetscMPIInt size; 4798 4799 PetscFunctionBegin; 4800 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4801 4802 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4803 B->data = (void*)b; 4804 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4805 B->factor = 0; 4806 B->rmap.bs = 1; 4807 B->assembled = PETSC_FALSE; 4808 B->mapping = 0; 4809 4810 B->insertmode = NOT_SET_VALUES; 4811 b->size = size; 4812 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4813 4814 /* build cache for off array entries formed */ 4815 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4816 b->donotstash = PETSC_FALSE; 4817 b->colmap = 0; 4818 b->garray = 0; 4819 b->roworiented = PETSC_TRUE; 4820 4821 /* stuff used for matrix vector multiply */ 4822 b->lvec = PETSC_NULL; 4823 b->Mvctx = PETSC_NULL; 4824 4825 /* stuff for MatGetRow() */ 4826 b->rowindices = 0; 4827 b->rowvalues = 0; 4828 b->getrowactive = PETSC_FALSE; 4829 4830 4831 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4832 "MatStoreValues_MPIAIJ", 4833 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4834 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4835 "MatRetrieveValues_MPIAIJ", 4836 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4837 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4838 "MatGetDiagonalBlock_MPIAIJ", 4839 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4840 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4841 "MatIsTranspose_MPIAIJ", 4842 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4844 "MatMPIAIJSetPreallocation_MPIAIJ", 4845 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4847 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4848 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4849 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4850 "MatDiagonalScaleLocal_MPIAIJ", 4851 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4852 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4853 "MatConvert_MPIAIJ_MPICSRPERM", 4854 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4855 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4856 "MatConvert_MPIAIJ_MPICRL", 4857 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4858 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 4859 "MatMatMult_MPIDense_MPIAIJ", 4860 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 4861 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 4862 "MatMatMultSymbolic_MPIDense_MPIAIJ", 4863 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 4864 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 4865 "MatMatMultNumeric_MPIDense_MPIAIJ", 4866 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 4867 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4868 PetscFunctionReturn(0); 4869 } 4870 EXTERN_C_END 4871 4872 #undef __FUNCT__ 4873 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4874 /*@ 4875 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4876 and "off-diagonal" part of the matrix in CSR format. 4877 4878 Collective on MPI_Comm 4879 4880 Input Parameters: 4881 + comm - MPI communicator 4882 . m - number of local rows (Cannot be PETSC_DECIDE) 4883 . n - This value should be the same as the local size used in creating the 4884 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4885 calculated if N is given) For square matrices n is almost always m. 4886 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4887 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4888 . i - row indices for "diagonal" portion of matrix 4889 . j - column indices 4890 . a - matrix values 4891 . oi - row indices for "off-diagonal" portion of matrix 4892 . oj - column indices 4893 - oa - matrix values 4894 4895 Output Parameter: 4896 . mat - the matrix 4897 4898 Level: advanced 4899 4900 Notes: 4901 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4902 4903 The i and j indices are 0 based 4904 4905 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4906 4907 4908 .keywords: matrix, aij, compressed row, sparse, parallel 4909 4910 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4911 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4912 @*/ 4913 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4914 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4915 { 4916 PetscErrorCode ierr; 4917 Mat_MPIAIJ *maij; 4918 4919 PetscFunctionBegin; 4920 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4921 if (i[0]) { 4922 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4923 } 4924 if (oi[0]) { 4925 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4926 } 4927 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4928 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4929 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4930 maij = (Mat_MPIAIJ*) (*mat)->data; 4931 maij->donotstash = PETSC_TRUE; 4932 (*mat)->preallocated = PETSC_TRUE; 4933 4934 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4935 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4936 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4937 4938 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4939 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4940 4941 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4942 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4943 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4944 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4945 4946 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4947 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4948 PetscFunctionReturn(0); 4949 } 4950 4951 /* 4952 Special version for direct calls from Fortran 4953 */ 4954 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4955 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4956 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4957 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4958 #endif 4959 4960 /* Change these macros so can be used in void function */ 4961 #undef CHKERRQ 4962 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4963 #undef SETERRQ2 4964 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4965 #undef SETERRQ 4966 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4967 4968 EXTERN_C_BEGIN 4969 #undef __FUNCT__ 4970 #define __FUNCT__ "matsetvaluesmpiaij_" 4971 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4972 { 4973 Mat mat = *mmat; 4974 PetscInt m = *mm, n = *mn; 4975 InsertMode addv = *maddv; 4976 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4977 PetscScalar value; 4978 PetscErrorCode ierr; 4979 4980 MatPreallocated(mat); 4981 if (mat->insertmode == NOT_SET_VALUES) { 4982 mat->insertmode = addv; 4983 } 4984 #if defined(PETSC_USE_DEBUG) 4985 else if (mat->insertmode != addv) { 4986 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4987 } 4988 #endif 4989 { 4990 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4991 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4992 PetscTruth roworiented = aij->roworiented; 4993 4994 /* Some Variables required in the macro */ 4995 Mat A = aij->A; 4996 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4997 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4998 MatScalar *aa = a->a; 4999 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5000 Mat B = aij->B; 5001 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5002 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 5003 MatScalar *ba = b->a; 5004 5005 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5006 PetscInt nonew = a->nonew; 5007 MatScalar *ap1,*ap2; 5008 5009 PetscFunctionBegin; 5010 for (i=0; i<m; i++) { 5011 if (im[i] < 0) continue; 5012 #if defined(PETSC_USE_DEBUG) 5013 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 5014 #endif 5015 if (im[i] >= rstart && im[i] < rend) { 5016 row = im[i] - rstart; 5017 lastcol1 = -1; 5018 rp1 = aj + ai[row]; 5019 ap1 = aa + ai[row]; 5020 rmax1 = aimax[row]; 5021 nrow1 = ailen[row]; 5022 low1 = 0; 5023 high1 = nrow1; 5024 lastcol2 = -1; 5025 rp2 = bj + bi[row]; 5026 ap2 = ba + bi[row]; 5027 rmax2 = bimax[row]; 5028 nrow2 = bilen[row]; 5029 low2 = 0; 5030 high2 = nrow2; 5031 5032 for (j=0; j<n; j++) { 5033 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5034 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5035 if (in[j] >= cstart && in[j] < cend){ 5036 col = in[j] - cstart; 5037 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5038 } else if (in[j] < 0) continue; 5039 #if defined(PETSC_USE_DEBUG) 5040 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 5041 #endif 5042 else { 5043 if (mat->was_assembled) { 5044 if (!aij->colmap) { 5045 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5046 } 5047 #if defined (PETSC_USE_CTABLE) 5048 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5049 col--; 5050 #else 5051 col = aij->colmap[in[j]] - 1; 5052 #endif 5053 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5054 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5055 col = in[j]; 5056 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5057 B = aij->B; 5058 b = (Mat_SeqAIJ*)B->data; 5059 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5060 rp2 = bj + bi[row]; 5061 ap2 = ba + bi[row]; 5062 rmax2 = bimax[row]; 5063 nrow2 = bilen[row]; 5064 low2 = 0; 5065 high2 = nrow2; 5066 bm = aij->B->rmap.n; 5067 ba = b->a; 5068 } 5069 } else col = in[j]; 5070 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5071 } 5072 } 5073 } else { 5074 if (!aij->donotstash) { 5075 if (roworiented) { 5076 if (ignorezeroentries && v[i*n] == 0.0) continue; 5077 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5078 } else { 5079 if (ignorezeroentries && v[i] == 0.0) continue; 5080 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5081 } 5082 } 5083 } 5084 }} 5085 PetscFunctionReturnVoid(); 5086 } 5087 EXTERN_C_END 5088 5089