1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap.n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap.n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap.n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap.n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 311 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap.n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 415 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap.range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap.n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap.N; 932 header[2] = mat->cmap.N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap.n; 938 range = mat->rmap.range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap.n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap.n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap.n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap.n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 1114 1115 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1116 if (!rank) { 1117 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1118 } else { 1119 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1120 } 1121 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1122 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1123 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1125 1126 /* copy over the A part */ 1127 Aloc = (Mat_SeqAIJ*)aij->A->data; 1128 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1129 row = mat->rmap.rstart; 1130 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 1131 for (i=0; i<m; i++) { 1132 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1133 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1134 } 1135 aj = Aloc->j; 1136 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 1137 1138 /* copy over the B part */ 1139 Aloc = (Mat_SeqAIJ*)aij->B->data; 1140 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1141 row = mat->rmap.rstart; 1142 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1143 ct = cols; 1144 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1148 } 1149 ierr = PetscFree(ct);CHKERRQ(ierr); 1150 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1151 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 /* 1153 Everyone has to call to draw the matrix since the graphics waits are 1154 synchronized across all processors that share the PetscDraw object 1155 */ 1156 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1157 if (!rank) { 1158 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1159 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1160 } 1161 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1162 ierr = MatDestroy(A);CHKERRQ(ierr); 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatView_MPIAIJ" 1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscErrorCode ierr; 1172 PetscTruth iascii,isdraw,issocket,isbinary; 1173 1174 PetscFunctionBegin; 1175 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1176 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1177 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1178 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1179 if (iascii || isdraw || isbinary || issocket) { 1180 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1181 } else { 1182 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRelax_MPIAIJ" 1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1190 { 1191 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1192 PetscErrorCode ierr; 1193 Vec bb1; 1194 1195 PetscFunctionBegin; 1196 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1197 1198 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1199 if (flag & SOR_ZERO_INITIAL_GUESS) { 1200 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1201 its--; 1202 } 1203 1204 while (its--) { 1205 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 1208 /* update rhs: bb1 = bb - B*x */ 1209 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1211 1212 /* local sweep */ 1213 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1214 } 1215 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1216 if (flag & SOR_ZERO_INITIAL_GUESS) { 1217 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1218 its--; 1219 } 1220 while (its--) { 1221 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 1224 /* update rhs: bb1 = bb - B*x */ 1225 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1226 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1227 1228 /* local sweep */ 1229 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1230 } 1231 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1232 if (flag & SOR_ZERO_INITIAL_GUESS) { 1233 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1234 its--; 1235 } 1236 while (its--) { 1237 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* update rhs: bb1 = bb - B*x */ 1241 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1242 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1243 1244 /* local sweep */ 1245 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1246 } 1247 } else { 1248 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1249 } 1250 1251 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatPermute_MPIAIJ" 1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1258 { 1259 MPI_Comm comm,pcomm; 1260 PetscInt first,local_size,nrows,*rows; 1261 int ntids; 1262 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1267 /* make a collective version of 'rowp' */ 1268 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1269 if (pcomm==comm) { 1270 crowp = rowp; 1271 } else { 1272 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1273 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1274 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1275 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1276 } 1277 /* collect the global row permutation and invert it */ 1278 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1279 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1280 if (pcomm!=comm) { 1281 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1282 } 1283 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1284 /* get the local target indices */ 1285 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1286 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1287 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1288 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1289 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1290 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1291 /* the column permutation is so much easier; 1292 make a local version of 'colp' and invert it */ 1293 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1294 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1295 if (ntids==1) { 1296 lcolp = colp; 1297 } else { 1298 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1299 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1300 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1301 } 1302 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1303 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1304 if (ntids>1) { 1305 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1306 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1307 } 1308 /* now we just get the submatrix */ 1309 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1310 /* clean up */ 1311 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1312 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1319 { 1320 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1321 Mat A = mat->A,B = mat->B; 1322 PetscErrorCode ierr; 1323 PetscReal isend[5],irecv[5]; 1324 1325 PetscFunctionBegin; 1326 info->block_size = 1.0; 1327 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1328 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1329 isend[3] = info->memory; isend[4] = info->mallocs; 1330 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1331 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1332 isend[3] += info->memory; isend[4] += info->mallocs; 1333 if (flag == MAT_LOCAL) { 1334 info->nz_used = isend[0]; 1335 info->nz_allocated = isend[1]; 1336 info->nz_unneeded = isend[2]; 1337 info->memory = isend[3]; 1338 info->mallocs = isend[4]; 1339 } else if (flag == MAT_GLOBAL_MAX) { 1340 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1341 info->nz_used = irecv[0]; 1342 info->nz_allocated = irecv[1]; 1343 info->nz_unneeded = irecv[2]; 1344 info->memory = irecv[3]; 1345 info->mallocs = irecv[4]; 1346 } else if (flag == MAT_GLOBAL_SUM) { 1347 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1348 info->nz_used = irecv[0]; 1349 info->nz_allocated = irecv[1]; 1350 info->nz_unneeded = irecv[2]; 1351 info->memory = irecv[3]; 1352 info->mallocs = irecv[4]; 1353 } 1354 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1355 info->fill_ratio_needed = 0; 1356 info->factor_mallocs = 0; 1357 info->rows_global = (double)matin->rmap.N; 1358 info->columns_global = (double)matin->cmap.N; 1359 info->rows_local = (double)matin->rmap.n; 1360 info->columns_local = (double)matin->cmap.N; 1361 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatSetOption_MPIAIJ" 1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1368 { 1369 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 switch (op) { 1374 case MAT_NEW_NONZERO_LOCATIONS: 1375 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1376 case MAT_KEEP_ZEROED_ROWS: 1377 case MAT_NEW_NONZERO_LOCATION_ERR: 1378 case MAT_USE_INODES: 1379 case MAT_IGNORE_ZERO_ENTRIES: 1380 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1381 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1382 break; 1383 case MAT_ROW_ORIENTED: 1384 a->roworiented = flg; 1385 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1386 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1387 break; 1388 case MAT_NEW_DIAGONALS: 1389 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1390 break; 1391 case MAT_IGNORE_OFF_PROC_ENTRIES: 1392 a->donotstash = PETSC_TRUE; 1393 break; 1394 case MAT_SYMMETRIC: 1395 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1396 break; 1397 case MAT_STRUCTURALLY_SYMMETRIC: 1398 case MAT_HERMITIAN: 1399 case MAT_SYMMETRY_ETERNAL: 1400 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1401 break; 1402 default: 1403 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatGetRow_MPIAIJ" 1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1411 { 1412 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1413 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1414 PetscErrorCode ierr; 1415 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1416 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1417 PetscInt *cmap,*idx_p; 1418 1419 PetscFunctionBegin; 1420 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1421 mat->getrowactive = PETSC_TRUE; 1422 1423 if (!mat->rowvalues && (idx || v)) { 1424 /* 1425 allocate enough space to hold information from the longest row. 1426 */ 1427 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1428 PetscInt max = 1,tmp; 1429 for (i=0; i<matin->rmap.n; i++) { 1430 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1431 if (max < tmp) { max = tmp; } 1432 } 1433 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1434 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1435 } 1436 1437 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1438 lrow = row - rstart; 1439 1440 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1441 if (!v) {pvA = 0; pvB = 0;} 1442 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1443 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1444 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1445 nztot = nzA + nzB; 1446 1447 cmap = mat->garray; 1448 if (v || idx) { 1449 if (nztot) { 1450 /* Sort by increasing column numbers, assuming A and B already sorted */ 1451 PetscInt imark = -1; 1452 if (v) { 1453 *v = v_p = mat->rowvalues; 1454 for (i=0; i<nzB; i++) { 1455 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1456 else break; 1457 } 1458 imark = i; 1459 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1460 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1461 } 1462 if (idx) { 1463 *idx = idx_p = mat->rowindices; 1464 if (imark > -1) { 1465 for (i=0; i<imark; i++) { 1466 idx_p[i] = cmap[cworkB[i]]; 1467 } 1468 } else { 1469 for (i=0; i<nzB; i++) { 1470 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1471 else break; 1472 } 1473 imark = i; 1474 } 1475 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1476 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1477 } 1478 } else { 1479 if (idx) *idx = 0; 1480 if (v) *v = 0; 1481 } 1482 } 1483 *nz = nztot; 1484 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1485 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1492 { 1493 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1494 1495 PetscFunctionBegin; 1496 if (!aij->getrowactive) { 1497 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1498 } 1499 aij->getrowactive = PETSC_FALSE; 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatNorm_MPIAIJ" 1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1506 { 1507 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1508 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1509 PetscErrorCode ierr; 1510 PetscInt i,j,cstart = mat->cmap.rstart; 1511 PetscReal sum = 0.0; 1512 MatScalar *v; 1513 1514 PetscFunctionBegin; 1515 if (aij->size == 1) { 1516 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1517 } else { 1518 if (type == NORM_FROBENIUS) { 1519 v = amat->a; 1520 for (i=0; i<amat->nz; i++) { 1521 #if defined(PETSC_USE_COMPLEX) 1522 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1523 #else 1524 sum += (*v)*(*v); v++; 1525 #endif 1526 } 1527 v = bmat->a; 1528 for (i=0; i<bmat->nz; i++) { 1529 #if defined(PETSC_USE_COMPLEX) 1530 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1531 #else 1532 sum += (*v)*(*v); v++; 1533 #endif 1534 } 1535 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1536 *norm = sqrt(*norm); 1537 } else if (type == NORM_1) { /* max column norm */ 1538 PetscReal *tmp,*tmp2; 1539 PetscInt *jj,*garray = aij->garray; 1540 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1541 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1542 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1543 *norm = 0.0; 1544 v = amat->a; jj = amat->j; 1545 for (j=0; j<amat->nz; j++) { 1546 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1547 } 1548 v = bmat->a; jj = bmat->j; 1549 for (j=0; j<bmat->nz; j++) { 1550 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1551 } 1552 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1553 for (j=0; j<mat->cmap.N; j++) { 1554 if (tmp2[j] > *norm) *norm = tmp2[j]; 1555 } 1556 ierr = PetscFree(tmp);CHKERRQ(ierr); 1557 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1558 } else if (type == NORM_INFINITY) { /* max row norm */ 1559 PetscReal ntemp = 0.0; 1560 for (j=0; j<aij->A->rmap.n; j++) { 1561 v = amat->a + amat->i[j]; 1562 sum = 0.0; 1563 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1564 sum += PetscAbsScalar(*v); v++; 1565 } 1566 v = bmat->a + bmat->i[j]; 1567 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1568 sum += PetscAbsScalar(*v); v++; 1569 } 1570 if (sum > ntemp) ntemp = sum; 1571 } 1572 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1573 } else { 1574 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1575 } 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatTranspose_MPIAIJ" 1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1583 { 1584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1585 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1586 PetscErrorCode ierr; 1587 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1588 PetscInt cstart=A->cmap.rstart,ncol; 1589 Mat B; 1590 MatScalar *array; 1591 1592 PetscFunctionBegin; 1593 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1594 1595 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1596 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1597 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1598 ai = Aloc->i; aj = Aloc->j; 1599 bi = Bloc->i; bj = Bloc->j; 1600 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1601 cols = d_nnz + na + 1; /* work space to be used by B part */ 1602 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1603 for (i=0; i<ai[ma]; i++){ 1604 d_nnz[aj[i]] ++; 1605 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1606 } 1607 1608 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1609 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1610 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1611 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1612 } else { 1613 B = *matout; 1614 } 1615 1616 /* copy over the A part */ 1617 array = Aloc->a; 1618 row = A->rmap.rstart; 1619 for (i=0; i<ma; i++) { 1620 ncol = ai[i+1]-ai[i]; 1621 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1622 row++; array += ncol; aj += ncol; 1623 } 1624 aj = Aloc->j; 1625 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1626 1627 /* copy over the B part */ 1628 array = Bloc->a; 1629 row = A->rmap.rstart; 1630 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1631 for (i=0; i<mb; i++) { 1632 ncol = bi[i+1]-bi[i]; 1633 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1634 row++; array += ncol; cols += ncol; 1635 } 1636 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1637 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1638 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1639 if (*matout != A) { 1640 *matout = B; 1641 } else { 1642 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1643 } 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1649 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1650 { 1651 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1652 Mat a = aij->A,b = aij->B; 1653 PetscErrorCode ierr; 1654 PetscInt s1,s2,s3; 1655 1656 PetscFunctionBegin; 1657 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1658 if (rr) { 1659 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1660 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1661 /* Overlap communication with computation. */ 1662 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 } 1664 if (ll) { 1665 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1666 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1667 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1668 } 1669 /* scale the diagonal block */ 1670 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1671 1672 if (rr) { 1673 /* Do a scatter end and then right scale the off-diagonal block */ 1674 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1675 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1676 } 1677 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1683 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1684 { 1685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1686 PetscErrorCode ierr; 1687 1688 PetscFunctionBegin; 1689 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1690 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1691 PetscFunctionReturn(0); 1692 } 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1695 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1696 { 1697 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1698 PetscErrorCode ierr; 1699 1700 PetscFunctionBegin; 1701 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatEqual_MPIAIJ" 1707 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1708 { 1709 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1710 Mat a,b,c,d; 1711 PetscTruth flg; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 a = matA->A; b = matA->B; 1716 c = matB->A; d = matB->B; 1717 1718 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1719 if (flg) { 1720 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1721 } 1722 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatCopy_MPIAIJ" 1728 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1729 { 1730 PetscErrorCode ierr; 1731 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1732 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1733 1734 PetscFunctionBegin; 1735 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1736 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1737 /* because of the column compression in the off-processor part of the matrix a->B, 1738 the number of columns in a->B and b->B may be different, hence we cannot call 1739 the MatCopy() directly on the two parts. If need be, we can provide a more 1740 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1741 then copying the submatrices */ 1742 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1743 } else { 1744 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1745 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1746 } 1747 PetscFunctionReturn(0); 1748 } 1749 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1752 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1753 { 1754 PetscErrorCode ierr; 1755 1756 PetscFunctionBegin; 1757 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #include "petscblaslapack.h" 1762 #undef __FUNCT__ 1763 #define __FUNCT__ "MatAXPY_MPIAIJ" 1764 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1765 { 1766 PetscErrorCode ierr; 1767 PetscInt i; 1768 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1769 PetscBLASInt bnz,one=1; 1770 Mat_SeqAIJ *x,*y; 1771 1772 PetscFunctionBegin; 1773 if (str == SAME_NONZERO_PATTERN) { 1774 PetscScalar alpha = a; 1775 x = (Mat_SeqAIJ *)xx->A->data; 1776 y = (Mat_SeqAIJ *)yy->A->data; 1777 bnz = PetscBLASIntCast(x->nz); 1778 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1779 x = (Mat_SeqAIJ *)xx->B->data; 1780 y = (Mat_SeqAIJ *)yy->B->data; 1781 bnz = PetscBLASIntCast(x->nz); 1782 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1783 } else if (str == SUBSET_NONZERO_PATTERN) { 1784 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1785 1786 x = (Mat_SeqAIJ *)xx->B->data; 1787 y = (Mat_SeqAIJ *)yy->B->data; 1788 if (y->xtoy && y->XtoY != xx->B) { 1789 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1790 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1791 } 1792 if (!y->xtoy) { /* get xtoy */ 1793 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1794 y->XtoY = xx->B; 1795 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1796 } 1797 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1798 } else { 1799 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1805 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "MatConjugate_MPIAIJ" 1808 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1809 { 1810 #if defined(PETSC_USE_COMPLEX) 1811 PetscErrorCode ierr; 1812 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1813 1814 PetscFunctionBegin; 1815 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1816 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1817 #else 1818 PetscFunctionBegin; 1819 #endif 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatRealPart_MPIAIJ" 1825 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1826 { 1827 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1828 PetscErrorCode ierr; 1829 1830 PetscFunctionBegin; 1831 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1832 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1838 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1839 { 1840 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1841 PetscErrorCode ierr; 1842 1843 PetscFunctionBegin; 1844 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1845 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1846 PetscFunctionReturn(0); 1847 } 1848 1849 #ifdef PETSC_HAVE_PBGL 1850 1851 #include <boost/parallel/mpi/bsp_process_group.hpp> 1852 #include <boost/graph/distributed/ilu_default_graph.hpp> 1853 #include <boost/graph/distributed/ilu_0_block.hpp> 1854 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1855 #include <boost/graph/distributed/petsc/interface.hpp> 1856 #include <boost/multi_array.hpp> 1857 #include <boost/parallel/distributed_property_map.hpp> 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1861 /* 1862 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1863 */ 1864 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1865 { 1866 namespace petsc = boost::distributed::petsc; 1867 1868 namespace graph_dist = boost::graph::distributed; 1869 using boost::graph::distributed::ilu_default::process_group_type; 1870 using boost::graph::ilu_permuted; 1871 1872 PetscTruth row_identity, col_identity; 1873 PetscContainer c; 1874 PetscInt m, n, M, N; 1875 PetscErrorCode ierr; 1876 1877 PetscFunctionBegin; 1878 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1879 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1880 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1881 if (!row_identity || !col_identity) { 1882 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1883 } 1884 1885 process_group_type pg; 1886 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1887 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1888 lgraph_type& level_graph = *lgraph_p; 1889 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1890 1891 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1892 ilu_permuted(level_graph); 1893 1894 /* put together the new matrix */ 1895 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1896 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1897 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1898 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1899 ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1900 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1901 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1902 (*fact)->factor = FACTOR_LU; 1903 1904 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1905 ierr = PetscContainerSetPointer(c, lgraph_p); 1906 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1912 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1913 { 1914 PetscFunctionBegin; 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatSolve_MPIAIJ" 1920 /* 1921 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1922 */ 1923 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1924 { 1925 namespace graph_dist = boost::graph::distributed; 1926 1927 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1928 lgraph_type* lgraph_p; 1929 PetscContainer c; 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1934 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1935 ierr = VecCopy(b, x); CHKERRQ(ierr); 1936 1937 PetscScalar* array_x; 1938 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1939 PetscInt sx; 1940 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1941 1942 PetscScalar* array_b; 1943 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1944 PetscInt sb; 1945 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1946 1947 lgraph_type& level_graph = *lgraph_p; 1948 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1949 1950 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1951 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1952 ref_x(array_x, boost::extents[num_vertices(graph)]); 1953 1954 typedef boost::iterator_property_map<array_ref_type::iterator, 1955 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1956 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1957 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1958 1959 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1960 1961 PetscFunctionReturn(0); 1962 } 1963 #endif 1964 1965 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1966 PetscInt nzlocal,nsends,nrecvs; 1967 PetscMPIInt *send_rank; 1968 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1969 PetscScalar *sbuf_a,**rbuf_a; 1970 PetscErrorCode (*MatDestroy)(Mat); 1971 } Mat_Redundant; 1972 1973 #undef __FUNCT__ 1974 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1975 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1976 { 1977 PetscErrorCode ierr; 1978 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1979 PetscInt i; 1980 1981 PetscFunctionBegin; 1982 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1983 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1984 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1985 for (i=0; i<redund->nrecvs; i++){ 1986 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1987 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1988 } 1989 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1990 ierr = PetscFree(redund);CHKERRQ(ierr); 1991 PetscFunctionReturn(0); 1992 } 1993 1994 #undef __FUNCT__ 1995 #define __FUNCT__ "MatDestroy_MatRedundant" 1996 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1997 { 1998 PetscErrorCode ierr; 1999 PetscContainer container; 2000 Mat_Redundant *redund=PETSC_NULL; 2001 2002 PetscFunctionBegin; 2003 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2004 if (container) { 2005 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2006 } else { 2007 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2008 } 2009 A->ops->destroy = redund->MatDestroy; 2010 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2011 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2012 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2013 PetscFunctionReturn(0); 2014 } 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2018 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2019 { 2020 PetscMPIInt rank,size; 2021 MPI_Comm comm=((PetscObject)mat)->comm; 2022 PetscErrorCode ierr; 2023 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2024 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2025 PetscInt *rowrange=mat->rmap.range; 2026 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2027 Mat A=aij->A,B=aij->B,C=*matredundant; 2028 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2029 PetscScalar *sbuf_a; 2030 PetscInt nzlocal=a->nz+b->nz; 2031 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2032 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 2033 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2034 MatScalar *aworkA,*aworkB; 2035 PetscScalar *vals; 2036 PetscMPIInt tag1,tag2,tag3,imdex; 2037 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2038 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2039 MPI_Status recv_status,*send_status; 2040 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2041 PetscInt **rbuf_j=PETSC_NULL; 2042 PetscScalar **rbuf_a=PETSC_NULL; 2043 Mat_Redundant *redund=PETSC_NULL; 2044 PetscContainer container; 2045 2046 PetscFunctionBegin; 2047 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2048 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2049 2050 if (reuse == MAT_REUSE_MATRIX) { 2051 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2052 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2053 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2054 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2055 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2056 if (container) { 2057 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2058 } else { 2059 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2060 } 2061 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2062 2063 nsends = redund->nsends; 2064 nrecvs = redund->nrecvs; 2065 send_rank = redund->send_rank; recv_rank = send_rank + size; 2066 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2067 sbuf_j = redund->sbuf_j; 2068 sbuf_a = redund->sbuf_a; 2069 rbuf_j = redund->rbuf_j; 2070 rbuf_a = redund->rbuf_a; 2071 } 2072 2073 if (reuse == MAT_INITIAL_MATRIX){ 2074 PetscMPIInt subrank,subsize; 2075 PetscInt nleftover,np_subcomm; 2076 /* get the destination processors' id send_rank, nsends and nrecvs */ 2077 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2078 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2079 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2080 recv_rank = send_rank + size; 2081 np_subcomm = size/nsubcomm; 2082 nleftover = size - nsubcomm*np_subcomm; 2083 nsends = 0; nrecvs = 0; 2084 for (i=0; i<size; i++){ /* i=rank*/ 2085 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2086 send_rank[nsends] = i; nsends++; 2087 recv_rank[nrecvs++] = i; 2088 } 2089 } 2090 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2091 i = size-nleftover-1; 2092 j = 0; 2093 while (j < nsubcomm - nleftover){ 2094 send_rank[nsends++] = i; 2095 i--; j++; 2096 } 2097 } 2098 2099 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2100 for (i=0; i<nleftover; i++){ 2101 recv_rank[nrecvs++] = size-nleftover+i; 2102 } 2103 } 2104 2105 /* allocate sbuf_j, sbuf_a */ 2106 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2107 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2108 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2109 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2110 2111 /* copy mat's local entries into the buffers */ 2112 if (reuse == MAT_INITIAL_MATRIX){ 2113 rownz_max = 0; 2114 rptr = sbuf_j; 2115 cols = sbuf_j + rend-rstart + 1; 2116 vals = sbuf_a; 2117 rptr[0] = 0; 2118 for (i=0; i<rend-rstart; i++){ 2119 row = i + rstart; 2120 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2121 ncols = nzA + nzB; 2122 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2123 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2124 /* load the column indices for this row into cols */ 2125 lwrite = 0; 2126 for (l=0; l<nzB; l++) { 2127 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2128 vals[lwrite] = aworkB[l]; 2129 cols[lwrite++] = ctmp; 2130 } 2131 } 2132 for (l=0; l<nzA; l++){ 2133 vals[lwrite] = aworkA[l]; 2134 cols[lwrite++] = cstart + cworkA[l]; 2135 } 2136 for (l=0; l<nzB; l++) { 2137 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2138 vals[lwrite] = aworkB[l]; 2139 cols[lwrite++] = ctmp; 2140 } 2141 } 2142 vals += ncols; 2143 cols += ncols; 2144 rptr[i+1] = rptr[i] + ncols; 2145 if (rownz_max < ncols) rownz_max = ncols; 2146 } 2147 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2148 } else { /* only copy matrix values into sbuf_a */ 2149 rptr = sbuf_j; 2150 vals = sbuf_a; 2151 rptr[0] = 0; 2152 for (i=0; i<rend-rstart; i++){ 2153 row = i + rstart; 2154 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2155 ncols = nzA + nzB; 2156 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2157 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2158 lwrite = 0; 2159 for (l=0; l<nzB; l++) { 2160 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2161 } 2162 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2163 for (l=0; l<nzB; l++) { 2164 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2165 } 2166 vals += ncols; 2167 rptr[i+1] = rptr[i] + ncols; 2168 } 2169 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2170 2171 /* send nzlocal to others, and recv other's nzlocal */ 2172 /*--------------------------------------------------*/ 2173 if (reuse == MAT_INITIAL_MATRIX){ 2174 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2175 s_waits2 = s_waits3 + nsends; 2176 s_waits1 = s_waits2 + nsends; 2177 r_waits1 = s_waits1 + nsends; 2178 r_waits2 = r_waits1 + nrecvs; 2179 r_waits3 = r_waits2 + nrecvs; 2180 } else { 2181 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2182 r_waits3 = s_waits3 + nsends; 2183 } 2184 2185 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2186 if (reuse == MAT_INITIAL_MATRIX){ 2187 /* get new tags to keep the communication clean */ 2188 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2189 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2190 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2191 rbuf_nz = sbuf_nz + nsends; 2192 2193 /* post receives of other's nzlocal */ 2194 for (i=0; i<nrecvs; i++){ 2195 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2196 } 2197 /* send nzlocal to others */ 2198 for (i=0; i<nsends; i++){ 2199 sbuf_nz[i] = nzlocal; 2200 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2201 } 2202 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2203 count = nrecvs; 2204 while (count) { 2205 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2206 recv_rank[imdex] = recv_status.MPI_SOURCE; 2207 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2208 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2209 2210 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2211 rbuf_nz[imdex] += i + 2; 2212 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2213 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2214 count--; 2215 } 2216 /* wait on sends of nzlocal */ 2217 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2218 /* send mat->i,j to others, and recv from other's */ 2219 /*------------------------------------------------*/ 2220 for (i=0; i<nsends; i++){ 2221 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2222 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2223 } 2224 /* wait on receives of mat->i,j */ 2225 /*------------------------------*/ 2226 count = nrecvs; 2227 while (count) { 2228 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2229 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2230 count--; 2231 } 2232 /* wait on sends of mat->i,j */ 2233 /*---------------------------*/ 2234 if (nsends) { 2235 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2236 } 2237 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2238 2239 /* post receives, send and receive mat->a */ 2240 /*----------------------------------------*/ 2241 for (imdex=0; imdex<nrecvs; imdex++) { 2242 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2243 } 2244 for (i=0; i<nsends; i++){ 2245 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2246 } 2247 count = nrecvs; 2248 while (count) { 2249 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2250 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2251 count--; 2252 } 2253 if (nsends) { 2254 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2255 } 2256 2257 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2258 2259 /* create redundant matrix */ 2260 /*-------------------------*/ 2261 if (reuse == MAT_INITIAL_MATRIX){ 2262 /* compute rownz_max for preallocation */ 2263 for (imdex=0; imdex<nrecvs; imdex++){ 2264 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2265 rptr = rbuf_j[imdex]; 2266 for (i=0; i<j; i++){ 2267 ncols = rptr[i+1] - rptr[i]; 2268 if (rownz_max < ncols) rownz_max = ncols; 2269 } 2270 } 2271 2272 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2273 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2274 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2275 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2276 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2277 } else { 2278 C = *matredundant; 2279 } 2280 2281 /* insert local matrix entries */ 2282 rptr = sbuf_j; 2283 cols = sbuf_j + rend-rstart + 1; 2284 vals = sbuf_a; 2285 for (i=0; i<rend-rstart; i++){ 2286 row = i + rstart; 2287 ncols = rptr[i+1] - rptr[i]; 2288 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2289 vals += ncols; 2290 cols += ncols; 2291 } 2292 /* insert received matrix entries */ 2293 for (imdex=0; imdex<nrecvs; imdex++){ 2294 rstart = rowrange[recv_rank[imdex]]; 2295 rend = rowrange[recv_rank[imdex]+1]; 2296 rptr = rbuf_j[imdex]; 2297 cols = rbuf_j[imdex] + rend-rstart + 1; 2298 vals = rbuf_a[imdex]; 2299 for (i=0; i<rend-rstart; i++){ 2300 row = i + rstart; 2301 ncols = rptr[i+1] - rptr[i]; 2302 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2303 vals += ncols; 2304 cols += ncols; 2305 } 2306 } 2307 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2308 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2309 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2310 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2311 if (reuse == MAT_INITIAL_MATRIX){ 2312 PetscContainer container; 2313 *matredundant = C; 2314 /* create a supporting struct and attach it to C for reuse */ 2315 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2316 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2317 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2318 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2319 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2320 2321 redund->nzlocal = nzlocal; 2322 redund->nsends = nsends; 2323 redund->nrecvs = nrecvs; 2324 redund->send_rank = send_rank; 2325 redund->sbuf_nz = sbuf_nz; 2326 redund->sbuf_j = sbuf_j; 2327 redund->sbuf_a = sbuf_a; 2328 redund->rbuf_j = rbuf_j; 2329 redund->rbuf_a = rbuf_a; 2330 2331 redund->MatDestroy = C->ops->destroy; 2332 C->ops->destroy = MatDestroy_MatRedundant; 2333 } 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2339 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2340 { 2341 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2342 PetscInt n = A->rmap.n; 2343 PetscInt cstart = A->cmap.rstart; 2344 PetscInt *cmap = mat->garray; 2345 PetscInt *diagIdx, *offdiagIdx; 2346 Vec diagV, offdiagV; 2347 PetscScalar *a, *diagA, *offdiagA; 2348 PetscInt r; 2349 PetscErrorCode ierr; 2350 2351 PetscFunctionBegin; 2352 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2353 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2354 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2355 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2356 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2357 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2358 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2359 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2360 for(r = 0; r < n; ++r) { 2361 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2362 a[r] = diagA[r]; 2363 idx[r] = cstart + diagIdx[r]; 2364 } else { 2365 a[r] = offdiagA[r]; 2366 idx[r] = cmap[offdiagIdx[r]]; 2367 } 2368 } 2369 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2370 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2371 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2372 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2373 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2374 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 2378 #undef __FUNCT__ 2379 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2380 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2381 { 2382 PetscErrorCode ierr; 2383 2384 PetscFunctionBegin; 2385 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 /* -------------------------------------------------------------------*/ 2390 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2391 MatGetRow_MPIAIJ, 2392 MatRestoreRow_MPIAIJ, 2393 MatMult_MPIAIJ, 2394 /* 4*/ MatMultAdd_MPIAIJ, 2395 MatMultTranspose_MPIAIJ, 2396 MatMultTransposeAdd_MPIAIJ, 2397 #ifdef PETSC_HAVE_PBGL 2398 MatSolve_MPIAIJ, 2399 #else 2400 0, 2401 #endif 2402 0, 2403 0, 2404 /*10*/ 0, 2405 0, 2406 0, 2407 MatRelax_MPIAIJ, 2408 MatTranspose_MPIAIJ, 2409 /*15*/ MatGetInfo_MPIAIJ, 2410 MatEqual_MPIAIJ, 2411 MatGetDiagonal_MPIAIJ, 2412 MatDiagonalScale_MPIAIJ, 2413 MatNorm_MPIAIJ, 2414 /*20*/ MatAssemblyBegin_MPIAIJ, 2415 MatAssemblyEnd_MPIAIJ, 2416 0, 2417 MatSetOption_MPIAIJ, 2418 MatZeroEntries_MPIAIJ, 2419 /*25*/ MatZeroRows_MPIAIJ, 2420 0, 2421 #ifdef PETSC_HAVE_PBGL 2422 MatLUFactorNumeric_MPIAIJ, 2423 #else 2424 0, 2425 #endif 2426 0, 2427 0, 2428 /*30*/ MatSetUpPreallocation_MPIAIJ, 2429 #ifdef PETSC_HAVE_PBGL 2430 MatILUFactorSymbolic_MPIAIJ, 2431 #else 2432 0, 2433 #endif 2434 0, 2435 0, 2436 0, 2437 /*35*/ MatDuplicate_MPIAIJ, 2438 0, 2439 0, 2440 0, 2441 0, 2442 /*40*/ MatAXPY_MPIAIJ, 2443 MatGetSubMatrices_MPIAIJ, 2444 MatIncreaseOverlap_MPIAIJ, 2445 MatGetValues_MPIAIJ, 2446 MatCopy_MPIAIJ, 2447 /*45*/ 0, 2448 MatScale_MPIAIJ, 2449 0, 2450 0, 2451 0, 2452 /*50*/ MatSetBlockSize_MPIAIJ, 2453 0, 2454 0, 2455 0, 2456 0, 2457 /*55*/ MatFDColoringCreate_MPIAIJ, 2458 0, 2459 MatSetUnfactored_MPIAIJ, 2460 MatPermute_MPIAIJ, 2461 0, 2462 /*60*/ MatGetSubMatrix_MPIAIJ, 2463 MatDestroy_MPIAIJ, 2464 MatView_MPIAIJ, 2465 0, 2466 0, 2467 /*65*/ 0, 2468 0, 2469 0, 2470 0, 2471 0, 2472 /*70*/ 0, 2473 0, 2474 MatSetColoring_MPIAIJ, 2475 #if defined(PETSC_HAVE_ADIC) 2476 MatSetValuesAdic_MPIAIJ, 2477 #else 2478 0, 2479 #endif 2480 MatSetValuesAdifor_MPIAIJ, 2481 /*75*/ 0, 2482 0, 2483 0, 2484 0, 2485 0, 2486 /*80*/ 0, 2487 0, 2488 0, 2489 0, 2490 /*84*/ MatLoad_MPIAIJ, 2491 0, 2492 0, 2493 0, 2494 0, 2495 0, 2496 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2497 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2498 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2499 MatPtAP_Basic, 2500 MatPtAPSymbolic_MPIAIJ, 2501 /*95*/ MatPtAPNumeric_MPIAIJ, 2502 0, 2503 0, 2504 0, 2505 0, 2506 /*100*/0, 2507 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2508 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2509 MatConjugate_MPIAIJ, 2510 0, 2511 /*105*/MatSetValuesRow_MPIAIJ, 2512 MatRealPart_MPIAIJ, 2513 MatImaginaryPart_MPIAIJ, 2514 0, 2515 0, 2516 /*110*/0, 2517 MatGetRedundantMatrix_MPIAIJ, 2518 MatGetRowMin_MPIAIJ, 2519 0, 2520 0, 2521 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2522 2523 /* ----------------------------------------------------------------------------------------*/ 2524 2525 EXTERN_C_BEGIN 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2528 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2529 { 2530 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2531 PetscErrorCode ierr; 2532 2533 PetscFunctionBegin; 2534 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2535 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 EXTERN_C_END 2539 2540 EXTERN_C_BEGIN 2541 #undef __FUNCT__ 2542 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2543 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2544 { 2545 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2546 PetscErrorCode ierr; 2547 2548 PetscFunctionBegin; 2549 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2550 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 EXTERN_C_END 2554 2555 #include "petscpc.h" 2556 EXTERN_C_BEGIN 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2559 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2560 { 2561 Mat_MPIAIJ *b; 2562 PetscErrorCode ierr; 2563 PetscInt i; 2564 2565 PetscFunctionBegin; 2566 B->preallocated = PETSC_TRUE; 2567 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2568 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2569 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2570 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2571 2572 B->rmap.bs = B->cmap.bs = 1; 2573 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2574 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2575 if (d_nnz) { 2576 for (i=0; i<B->rmap.n; i++) { 2577 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2578 } 2579 } 2580 if (o_nnz) { 2581 for (i=0; i<B->rmap.n; i++) { 2582 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2583 } 2584 } 2585 b = (Mat_MPIAIJ*)B->data; 2586 2587 /* Explicitly create 2 MATSEQAIJ matrices. */ 2588 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2589 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2590 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2591 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2592 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2593 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2594 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2595 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2596 2597 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2598 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2599 2600 PetscFunctionReturn(0); 2601 } 2602 EXTERN_C_END 2603 2604 #undef __FUNCT__ 2605 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2606 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2607 { 2608 Mat mat; 2609 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2610 PetscErrorCode ierr; 2611 2612 PetscFunctionBegin; 2613 *newmat = 0; 2614 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2615 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2616 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2617 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2618 a = (Mat_MPIAIJ*)mat->data; 2619 2620 mat->factor = matin->factor; 2621 mat->rmap.bs = matin->rmap.bs; 2622 mat->assembled = PETSC_TRUE; 2623 mat->insertmode = NOT_SET_VALUES; 2624 mat->preallocated = PETSC_TRUE; 2625 2626 a->size = oldmat->size; 2627 a->rank = oldmat->rank; 2628 a->donotstash = oldmat->donotstash; 2629 a->roworiented = oldmat->roworiented; 2630 a->rowindices = 0; 2631 a->rowvalues = 0; 2632 a->getrowactive = PETSC_FALSE; 2633 2634 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2635 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2636 2637 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2638 if (oldmat->colmap) { 2639 #if defined (PETSC_USE_CTABLE) 2640 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2641 #else 2642 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2643 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2644 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2645 #endif 2646 } else a->colmap = 0; 2647 if (oldmat->garray) { 2648 PetscInt len; 2649 len = oldmat->B->cmap.n; 2650 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2651 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2652 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2653 } else a->garray = 0; 2654 2655 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2656 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2657 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2658 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2659 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2660 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2661 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2662 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2663 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2664 *newmat = mat; 2665 PetscFunctionReturn(0); 2666 } 2667 2668 #include "petscsys.h" 2669 2670 #undef __FUNCT__ 2671 #define __FUNCT__ "MatLoad_MPIAIJ" 2672 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2673 { 2674 Mat A; 2675 PetscScalar *vals,*svals; 2676 MPI_Comm comm = ((PetscObject)viewer)->comm; 2677 MPI_Status status; 2678 PetscErrorCode ierr; 2679 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2680 PetscInt i,nz,j,rstart,rend,mmax; 2681 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2682 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2683 PetscInt cend,cstart,n,*rowners; 2684 int fd; 2685 2686 PetscFunctionBegin; 2687 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2688 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2689 if (!rank) { 2690 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2691 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2692 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2693 } 2694 2695 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2696 M = header[1]; N = header[2]; 2697 /* determine ownership of all rows */ 2698 m = M/size + ((M % size) > rank); 2699 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2700 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2701 2702 /* First process needs enough room for process with most rows */ 2703 if (!rank) { 2704 mmax = rowners[1]; 2705 for (i=2; i<size; i++) { 2706 mmax = PetscMax(mmax,rowners[i]); 2707 } 2708 } else mmax = m; 2709 2710 rowners[0] = 0; 2711 for (i=2; i<=size; i++) { 2712 rowners[i] += rowners[i-1]; 2713 } 2714 rstart = rowners[rank]; 2715 rend = rowners[rank+1]; 2716 2717 /* distribute row lengths to all processors */ 2718 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2719 if (!rank) { 2720 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2721 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2722 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2723 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2724 for (j=0; j<m; j++) { 2725 procsnz[0] += ourlens[j]; 2726 } 2727 for (i=1; i<size; i++) { 2728 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2729 /* calculate the number of nonzeros on each processor */ 2730 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2731 procsnz[i] += rowlengths[j]; 2732 } 2733 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2734 } 2735 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2736 } else { 2737 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2738 } 2739 2740 if (!rank) { 2741 /* determine max buffer needed and allocate it */ 2742 maxnz = 0; 2743 for (i=0; i<size; i++) { 2744 maxnz = PetscMax(maxnz,procsnz[i]); 2745 } 2746 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2747 2748 /* read in my part of the matrix column indices */ 2749 nz = procsnz[0]; 2750 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2751 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2752 2753 /* read in every one elses and ship off */ 2754 for (i=1; i<size; i++) { 2755 nz = procsnz[i]; 2756 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2757 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2758 } 2759 ierr = PetscFree(cols);CHKERRQ(ierr); 2760 } else { 2761 /* determine buffer space needed for message */ 2762 nz = 0; 2763 for (i=0; i<m; i++) { 2764 nz += ourlens[i]; 2765 } 2766 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2767 2768 /* receive message of column indices*/ 2769 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2770 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2771 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2772 } 2773 2774 /* determine column ownership if matrix is not square */ 2775 if (N != M) { 2776 n = N/size + ((N % size) > rank); 2777 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2778 cstart = cend - n; 2779 } else { 2780 cstart = rstart; 2781 cend = rend; 2782 n = cend - cstart; 2783 } 2784 2785 /* loop over local rows, determining number of off diagonal entries */ 2786 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2787 jj = 0; 2788 for (i=0; i<m; i++) { 2789 for (j=0; j<ourlens[i]; j++) { 2790 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2791 jj++; 2792 } 2793 } 2794 2795 /* create our matrix */ 2796 for (i=0; i<m; i++) { 2797 ourlens[i] -= offlens[i]; 2798 } 2799 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2800 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2801 ierr = MatSetType(A,type);CHKERRQ(ierr); 2802 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2803 2804 for (i=0; i<m; i++) { 2805 ourlens[i] += offlens[i]; 2806 } 2807 2808 if (!rank) { 2809 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2810 2811 /* read in my part of the matrix numerical values */ 2812 nz = procsnz[0]; 2813 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2814 2815 /* insert into matrix */ 2816 jj = rstart; 2817 smycols = mycols; 2818 svals = vals; 2819 for (i=0; i<m; i++) { 2820 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2821 smycols += ourlens[i]; 2822 svals += ourlens[i]; 2823 jj++; 2824 } 2825 2826 /* read in other processors and ship out */ 2827 for (i=1; i<size; i++) { 2828 nz = procsnz[i]; 2829 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2830 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2831 } 2832 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2833 } else { 2834 /* receive numeric values */ 2835 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2836 2837 /* receive message of values*/ 2838 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2839 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2840 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2841 2842 /* insert into matrix */ 2843 jj = rstart; 2844 smycols = mycols; 2845 svals = vals; 2846 for (i=0; i<m; i++) { 2847 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2848 smycols += ourlens[i]; 2849 svals += ourlens[i]; 2850 jj++; 2851 } 2852 } 2853 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2854 ierr = PetscFree(vals);CHKERRQ(ierr); 2855 ierr = PetscFree(mycols);CHKERRQ(ierr); 2856 ierr = PetscFree(rowners);CHKERRQ(ierr); 2857 2858 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2859 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2860 *newmat = A; 2861 PetscFunctionReturn(0); 2862 } 2863 2864 #undef __FUNCT__ 2865 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2866 /* 2867 Not great since it makes two copies of the submatrix, first an SeqAIJ 2868 in local and then by concatenating the local matrices the end result. 2869 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2870 */ 2871 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2872 { 2873 PetscErrorCode ierr; 2874 PetscMPIInt rank,size; 2875 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2876 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2877 Mat *local,M,Mreuse; 2878 MatScalar *vwork,*aa; 2879 MPI_Comm comm = ((PetscObject)mat)->comm; 2880 Mat_SeqAIJ *aij; 2881 2882 2883 PetscFunctionBegin; 2884 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2886 2887 if (call == MAT_REUSE_MATRIX) { 2888 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2889 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2890 local = &Mreuse; 2891 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2892 } else { 2893 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2894 Mreuse = *local; 2895 ierr = PetscFree(local);CHKERRQ(ierr); 2896 } 2897 2898 /* 2899 m - number of local rows 2900 n - number of columns (same on all processors) 2901 rstart - first row in new global matrix generated 2902 */ 2903 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2904 if (call == MAT_INITIAL_MATRIX) { 2905 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2906 ii = aij->i; 2907 jj = aij->j; 2908 2909 /* 2910 Determine the number of non-zeros in the diagonal and off-diagonal 2911 portions of the matrix in order to do correct preallocation 2912 */ 2913 2914 /* first get start and end of "diagonal" columns */ 2915 if (csize == PETSC_DECIDE) { 2916 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2917 if (mglobal == n) { /* square matrix */ 2918 nlocal = m; 2919 } else { 2920 nlocal = n/size + ((n % size) > rank); 2921 } 2922 } else { 2923 nlocal = csize; 2924 } 2925 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2926 rstart = rend - nlocal; 2927 if (rank == size - 1 && rend != n) { 2928 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2929 } 2930 2931 /* next, compute all the lengths */ 2932 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2933 olens = dlens + m; 2934 for (i=0; i<m; i++) { 2935 jend = ii[i+1] - ii[i]; 2936 olen = 0; 2937 dlen = 0; 2938 for (j=0; j<jend; j++) { 2939 if (*jj < rstart || *jj >= rend) olen++; 2940 else dlen++; 2941 jj++; 2942 } 2943 olens[i] = olen; 2944 dlens[i] = dlen; 2945 } 2946 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2947 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2948 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2949 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2950 ierr = PetscFree(dlens);CHKERRQ(ierr); 2951 } else { 2952 PetscInt ml,nl; 2953 2954 M = *newmat; 2955 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2956 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2957 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2958 /* 2959 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2960 rather than the slower MatSetValues(). 2961 */ 2962 M->was_assembled = PETSC_TRUE; 2963 M->assembled = PETSC_FALSE; 2964 } 2965 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2966 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2967 ii = aij->i; 2968 jj = aij->j; 2969 aa = aij->a; 2970 for (i=0; i<m; i++) { 2971 row = rstart + i; 2972 nz = ii[i+1] - ii[i]; 2973 cwork = jj; jj += nz; 2974 vwork = aa; aa += nz; 2975 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2976 } 2977 2978 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2979 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2980 *newmat = M; 2981 2982 /* save submatrix used in processor for next request */ 2983 if (call == MAT_INITIAL_MATRIX) { 2984 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2985 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2986 } 2987 2988 PetscFunctionReturn(0); 2989 } 2990 2991 EXTERN_C_BEGIN 2992 #undef __FUNCT__ 2993 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2994 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2995 { 2996 PetscInt m,cstart, cend,j,nnz,i,d; 2997 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2998 const PetscInt *JJ; 2999 PetscScalar *values; 3000 PetscErrorCode ierr; 3001 3002 PetscFunctionBegin; 3003 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3004 3005 B->rmap.bs = B->cmap.bs = 1; 3006 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 3007 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 3008 m = B->rmap.n; 3009 cstart = B->cmap.rstart; 3010 cend = B->cmap.rend; 3011 rstart = B->rmap.rstart; 3012 3013 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3014 o_nnz = d_nnz + m; 3015 3016 #if defined(PETSC_USE_DEBUGGING) 3017 for (i=0; i<m; i++) { 3018 nnz = Ii[i+1]- Ii[i]; 3019 JJ = J + Ii[i]; 3020 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3021 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3022 if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N); 3023 for (j=1; j<nnz; j++) { 3024 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3025 } 3026 } 3027 #endif 3028 3029 for (i=0; i<m; i++) { 3030 nnz = Ii[i+1]- Ii[i]; 3031 JJ = J + Ii[i]; 3032 nnz_max = PetscMax(nnz_max,nnz); 3033 for (j=0; j<nnz; j++) { 3034 if (*JJ >= cstart) break; 3035 JJ++; 3036 } 3037 d = 0; 3038 for (; j<nnz; j++) { 3039 if (*JJ++ >= cend) break; 3040 d++; 3041 } 3042 d_nnz[i] = d; 3043 o_nnz[i] = nnz - d; 3044 } 3045 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3046 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3047 3048 if (v) values = (PetscScalar*)v; 3049 else { 3050 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3051 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3052 } 3053 3054 for (i=0; i<m; i++) { 3055 ii = i + rstart; 3056 nnz = Ii[i+1]- Ii[i]; 3057 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3058 } 3059 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 3062 if (!v) { 3063 ierr = PetscFree(values);CHKERRQ(ierr); 3064 } 3065 PetscFunctionReturn(0); 3066 } 3067 EXTERN_C_END 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3071 /*@ 3072 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3073 (the default parallel PETSc format). 3074 3075 Collective on MPI_Comm 3076 3077 Input Parameters: 3078 + B - the matrix 3079 . i - the indices into j for the start of each local row (starts with zero) 3080 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3081 - v - optional values in the matrix 3082 3083 Level: developer 3084 3085 Notes: 3086 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3087 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3088 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3089 3090 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3091 3092 The format which is used for the sparse matrix input, is equivalent to a 3093 row-major ordering.. i.e for the following matrix, the input data expected is 3094 as shown: 3095 3096 1 0 0 3097 2 0 3 P0 3098 ------- 3099 4 5 6 P1 3100 3101 Process0 [P0]: rows_owned=[0,1] 3102 i = {0,1,3} [size = nrow+1 = 2+1] 3103 j = {0,0,2} [size = nz = 6] 3104 v = {1,2,3} [size = nz = 6] 3105 3106 Process1 [P1]: rows_owned=[2] 3107 i = {0,3} [size = nrow+1 = 1+1] 3108 j = {0,1,2} [size = nz = 6] 3109 v = {4,5,6} [size = nz = 6] 3110 3111 The column indices for each row MUST be sorted. 3112 3113 .keywords: matrix, aij, compressed row, sparse, parallel 3114 3115 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3116 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3117 @*/ 3118 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3119 { 3120 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3121 3122 PetscFunctionBegin; 3123 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3124 if (f) { 3125 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3126 } 3127 PetscFunctionReturn(0); 3128 } 3129 3130 #undef __FUNCT__ 3131 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3132 /*@C 3133 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3134 (the default parallel PETSc format). For good matrix assembly performance 3135 the user should preallocate the matrix storage by setting the parameters 3136 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3137 performance can be increased by more than a factor of 50. 3138 3139 Collective on MPI_Comm 3140 3141 Input Parameters: 3142 + A - the matrix 3143 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3144 (same value is used for all local rows) 3145 . d_nnz - array containing the number of nonzeros in the various rows of the 3146 DIAGONAL portion of the local submatrix (possibly different for each row) 3147 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3148 The size of this array is equal to the number of local rows, i.e 'm'. 3149 You must leave room for the diagonal entry even if it is zero. 3150 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3151 submatrix (same value is used for all local rows). 3152 - o_nnz - array containing the number of nonzeros in the various rows of the 3153 OFF-DIAGONAL portion of the local submatrix (possibly different for 3154 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3155 structure. The size of this array is equal to the number 3156 of local rows, i.e 'm'. 3157 3158 If the *_nnz parameter is given then the *_nz parameter is ignored 3159 3160 The AIJ format (also called the Yale sparse matrix format or 3161 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3162 storage. The stored row and column indices begin with zero. See the users manual for details. 3163 3164 The parallel matrix is partitioned such that the first m0 rows belong to 3165 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3166 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3167 3168 The DIAGONAL portion of the local submatrix of a processor can be defined 3169 as the submatrix which is obtained by extraction the part corresponding 3170 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3171 first row that belongs to the processor, and r2 is the last row belonging 3172 to the this processor. This is a square mxm matrix. The remaining portion 3173 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3174 3175 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3176 3177 You can call MatGetInfo() to get information on how effective the preallocation was; 3178 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3179 You can also run with the option -info and look for messages with the string 3180 malloc in them to see if additional memory allocation was needed. 3181 3182 Example usage: 3183 3184 Consider the following 8x8 matrix with 34 non-zero values, that is 3185 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3186 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3187 as follows: 3188 3189 .vb 3190 1 2 0 | 0 3 0 | 0 4 3191 Proc0 0 5 6 | 7 0 0 | 8 0 3192 9 0 10 | 11 0 0 | 12 0 3193 ------------------------------------- 3194 13 0 14 | 15 16 17 | 0 0 3195 Proc1 0 18 0 | 19 20 21 | 0 0 3196 0 0 0 | 22 23 0 | 24 0 3197 ------------------------------------- 3198 Proc2 25 26 27 | 0 0 28 | 29 0 3199 30 0 0 | 31 32 33 | 0 34 3200 .ve 3201 3202 This can be represented as a collection of submatrices as: 3203 3204 .vb 3205 A B C 3206 D E F 3207 G H I 3208 .ve 3209 3210 Where the submatrices A,B,C are owned by proc0, D,E,F are 3211 owned by proc1, G,H,I are owned by proc2. 3212 3213 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3214 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3215 The 'M','N' parameters are 8,8, and have the same values on all procs. 3216 3217 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3218 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3219 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3220 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3221 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3222 matrix, ans [DF] as another SeqAIJ matrix. 3223 3224 When d_nz, o_nz parameters are specified, d_nz storage elements are 3225 allocated for every row of the local diagonal submatrix, and o_nz 3226 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3227 One way to choose d_nz and o_nz is to use the max nonzerors per local 3228 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3229 In this case, the values of d_nz,o_nz are: 3230 .vb 3231 proc0 : dnz = 2, o_nz = 2 3232 proc1 : dnz = 3, o_nz = 2 3233 proc2 : dnz = 1, o_nz = 4 3234 .ve 3235 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3236 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3237 for proc3. i.e we are using 12+15+10=37 storage locations to store 3238 34 values. 3239 3240 When d_nnz, o_nnz parameters are specified, the storage is specified 3241 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3242 In the above case the values for d_nnz,o_nnz are: 3243 .vb 3244 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3245 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3246 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3247 .ve 3248 Here the space allocated is sum of all the above values i.e 34, and 3249 hence pre-allocation is perfect. 3250 3251 Level: intermediate 3252 3253 .keywords: matrix, aij, compressed row, sparse, parallel 3254 3255 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3256 MPIAIJ, MatGetInfo() 3257 @*/ 3258 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3259 { 3260 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3261 3262 PetscFunctionBegin; 3263 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3264 if (f) { 3265 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3266 } 3267 PetscFunctionReturn(0); 3268 } 3269 3270 #undef __FUNCT__ 3271 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3272 /*@ 3273 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3274 CSR format the local rows. 3275 3276 Collective on MPI_Comm 3277 3278 Input Parameters: 3279 + comm - MPI communicator 3280 . m - number of local rows (Cannot be PETSC_DECIDE) 3281 . n - This value should be the same as the local size used in creating the 3282 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3283 calculated if N is given) For square matrices n is almost always m. 3284 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3285 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3286 . i - row indices 3287 . j - column indices 3288 - a - matrix values 3289 3290 Output Parameter: 3291 . mat - the matrix 3292 3293 Level: intermediate 3294 3295 Notes: 3296 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3297 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3298 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3299 3300 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3301 3302 The format which is used for the sparse matrix input, is equivalent to a 3303 row-major ordering.. i.e for the following matrix, the input data expected is 3304 as shown: 3305 3306 1 0 0 3307 2 0 3 P0 3308 ------- 3309 4 5 6 P1 3310 3311 Process0 [P0]: rows_owned=[0,1] 3312 i = {0,1,3} [size = nrow+1 = 2+1] 3313 j = {0,0,2} [size = nz = 6] 3314 v = {1,2,3} [size = nz = 6] 3315 3316 Process1 [P1]: rows_owned=[2] 3317 i = {0,3} [size = nrow+1 = 1+1] 3318 j = {0,1,2} [size = nz = 6] 3319 v = {4,5,6} [size = nz = 6] 3320 3321 .keywords: matrix, aij, compressed row, sparse, parallel 3322 3323 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3324 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3325 @*/ 3326 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3327 { 3328 PetscErrorCode ierr; 3329 3330 PetscFunctionBegin; 3331 if (i[0]) { 3332 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3333 } 3334 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3335 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3336 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3337 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3338 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3339 PetscFunctionReturn(0); 3340 } 3341 3342 #undef __FUNCT__ 3343 #define __FUNCT__ "MatCreateMPIAIJ" 3344 /*@C 3345 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3346 (the default parallel PETSc format). For good matrix assembly performance 3347 the user should preallocate the matrix storage by setting the parameters 3348 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3349 performance can be increased by more than a factor of 50. 3350 3351 Collective on MPI_Comm 3352 3353 Input Parameters: 3354 + comm - MPI communicator 3355 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3356 This value should be the same as the local size used in creating the 3357 y vector for the matrix-vector product y = Ax. 3358 . n - This value should be the same as the local size used in creating the 3359 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3360 calculated if N is given) For square matrices n is almost always m. 3361 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3362 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3363 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3364 (same value is used for all local rows) 3365 . d_nnz - array containing the number of nonzeros in the various rows of the 3366 DIAGONAL portion of the local submatrix (possibly different for each row) 3367 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3368 The size of this array is equal to the number of local rows, i.e 'm'. 3369 You must leave room for the diagonal entry even if it is zero. 3370 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3371 submatrix (same value is used for all local rows). 3372 - o_nnz - array containing the number of nonzeros in the various rows of the 3373 OFF-DIAGONAL portion of the local submatrix (possibly different for 3374 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3375 structure. The size of this array is equal to the number 3376 of local rows, i.e 'm'. 3377 3378 Output Parameter: 3379 . A - the matrix 3380 3381 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3382 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3383 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3384 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3385 3386 Notes: 3387 If the *_nnz parameter is given then the *_nz parameter is ignored 3388 3389 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3390 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3391 storage requirements for this matrix. 3392 3393 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3394 processor than it must be used on all processors that share the object for 3395 that argument. 3396 3397 The user MUST specify either the local or global matrix dimensions 3398 (possibly both). 3399 3400 The parallel matrix is partitioned across processors such that the 3401 first m0 rows belong to process 0, the next m1 rows belong to 3402 process 1, the next m2 rows belong to process 2 etc.. where 3403 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3404 values corresponding to [m x N] submatrix. 3405 3406 The columns are logically partitioned with the n0 columns belonging 3407 to 0th partition, the next n1 columns belonging to the next 3408 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3409 3410 The DIAGONAL portion of the local submatrix on any given processor 3411 is the submatrix corresponding to the rows and columns m,n 3412 corresponding to the given processor. i.e diagonal matrix on 3413 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3414 etc. The remaining portion of the local submatrix [m x (N-n)] 3415 constitute the OFF-DIAGONAL portion. The example below better 3416 illustrates this concept. 3417 3418 For a square global matrix we define each processor's diagonal portion 3419 to be its local rows and the corresponding columns (a square submatrix); 3420 each processor's off-diagonal portion encompasses the remainder of the 3421 local matrix (a rectangular submatrix). 3422 3423 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3424 3425 When calling this routine with a single process communicator, a matrix of 3426 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3427 type of communicator, use the construction mechanism: 3428 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3429 3430 By default, this format uses inodes (identical nodes) when possible. 3431 We search for consecutive rows with the same nonzero structure, thereby 3432 reusing matrix information to achieve increased efficiency. 3433 3434 Options Database Keys: 3435 + -mat_no_inode - Do not use inodes 3436 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3437 - -mat_aij_oneindex - Internally use indexing starting at 1 3438 rather than 0. Note that when calling MatSetValues(), 3439 the user still MUST index entries starting at 0! 3440 3441 3442 Example usage: 3443 3444 Consider the following 8x8 matrix with 34 non-zero values, that is 3445 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3446 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3447 as follows: 3448 3449 .vb 3450 1 2 0 | 0 3 0 | 0 4 3451 Proc0 0 5 6 | 7 0 0 | 8 0 3452 9 0 10 | 11 0 0 | 12 0 3453 ------------------------------------- 3454 13 0 14 | 15 16 17 | 0 0 3455 Proc1 0 18 0 | 19 20 21 | 0 0 3456 0 0 0 | 22 23 0 | 24 0 3457 ------------------------------------- 3458 Proc2 25 26 27 | 0 0 28 | 29 0 3459 30 0 0 | 31 32 33 | 0 34 3460 .ve 3461 3462 This can be represented as a collection of submatrices as: 3463 3464 .vb 3465 A B C 3466 D E F 3467 G H I 3468 .ve 3469 3470 Where the submatrices A,B,C are owned by proc0, D,E,F are 3471 owned by proc1, G,H,I are owned by proc2. 3472 3473 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3474 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3475 The 'M','N' parameters are 8,8, and have the same values on all procs. 3476 3477 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3478 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3479 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3480 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3481 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3482 matrix, ans [DF] as another SeqAIJ matrix. 3483 3484 When d_nz, o_nz parameters are specified, d_nz storage elements are 3485 allocated for every row of the local diagonal submatrix, and o_nz 3486 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3487 One way to choose d_nz and o_nz is to use the max nonzerors per local 3488 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3489 In this case, the values of d_nz,o_nz are: 3490 .vb 3491 proc0 : dnz = 2, o_nz = 2 3492 proc1 : dnz = 3, o_nz = 2 3493 proc2 : dnz = 1, o_nz = 4 3494 .ve 3495 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3496 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3497 for proc3. i.e we are using 12+15+10=37 storage locations to store 3498 34 values. 3499 3500 When d_nnz, o_nnz parameters are specified, the storage is specified 3501 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3502 In the above case the values for d_nnz,o_nnz are: 3503 .vb 3504 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3505 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3506 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3507 .ve 3508 Here the space allocated is sum of all the above values i.e 34, and 3509 hence pre-allocation is perfect. 3510 3511 Level: intermediate 3512 3513 .keywords: matrix, aij, compressed row, sparse, parallel 3514 3515 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3516 MPIAIJ, MatCreateMPIAIJWithArrays() 3517 @*/ 3518 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3519 { 3520 PetscErrorCode ierr; 3521 PetscMPIInt size; 3522 3523 PetscFunctionBegin; 3524 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3525 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3527 if (size > 1) { 3528 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3529 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3530 } else { 3531 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3532 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3533 } 3534 PetscFunctionReturn(0); 3535 } 3536 3537 #undef __FUNCT__ 3538 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3539 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3540 { 3541 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3542 3543 PetscFunctionBegin; 3544 *Ad = a->A; 3545 *Ao = a->B; 3546 *colmap = a->garray; 3547 PetscFunctionReturn(0); 3548 } 3549 3550 #undef __FUNCT__ 3551 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3552 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3553 { 3554 PetscErrorCode ierr; 3555 PetscInt i; 3556 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3557 3558 PetscFunctionBegin; 3559 if (coloring->ctype == IS_COLORING_GLOBAL) { 3560 ISColoringValue *allcolors,*colors; 3561 ISColoring ocoloring; 3562 3563 /* set coloring for diagonal portion */ 3564 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3565 3566 /* set coloring for off-diagonal portion */ 3567 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3568 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3569 for (i=0; i<a->B->cmap.n; i++) { 3570 colors[i] = allcolors[a->garray[i]]; 3571 } 3572 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3573 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3574 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3575 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3576 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3577 ISColoringValue *colors; 3578 PetscInt *larray; 3579 ISColoring ocoloring; 3580 3581 /* set coloring for diagonal portion */ 3582 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3583 for (i=0; i<a->A->cmap.n; i++) { 3584 larray[i] = i + A->cmap.rstart; 3585 } 3586 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3587 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3588 for (i=0; i<a->A->cmap.n; i++) { 3589 colors[i] = coloring->colors[larray[i]]; 3590 } 3591 ierr = PetscFree(larray);CHKERRQ(ierr); 3592 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3593 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3594 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3595 3596 /* set coloring for off-diagonal portion */ 3597 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3598 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3599 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3600 for (i=0; i<a->B->cmap.n; i++) { 3601 colors[i] = coloring->colors[larray[i]]; 3602 } 3603 ierr = PetscFree(larray);CHKERRQ(ierr); 3604 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3605 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3606 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3607 } else { 3608 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3609 } 3610 3611 PetscFunctionReturn(0); 3612 } 3613 3614 #if defined(PETSC_HAVE_ADIC) 3615 #undef __FUNCT__ 3616 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3617 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3618 { 3619 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3620 PetscErrorCode ierr; 3621 3622 PetscFunctionBegin; 3623 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3624 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3625 PetscFunctionReturn(0); 3626 } 3627 #endif 3628 3629 #undef __FUNCT__ 3630 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3631 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3632 { 3633 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3634 PetscErrorCode ierr; 3635 3636 PetscFunctionBegin; 3637 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3638 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3639 PetscFunctionReturn(0); 3640 } 3641 3642 #undef __FUNCT__ 3643 #define __FUNCT__ "MatMerge" 3644 /*@ 3645 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3646 matrices from each processor 3647 3648 Collective on MPI_Comm 3649 3650 Input Parameters: 3651 + comm - the communicators the parallel matrix will live on 3652 . inmat - the input sequential matrices 3653 . n - number of local columns (or PETSC_DECIDE) 3654 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3655 3656 Output Parameter: 3657 . outmat - the parallel matrix generated 3658 3659 Level: advanced 3660 3661 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3662 3663 @*/ 3664 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3665 { 3666 PetscErrorCode ierr; 3667 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3668 PetscInt *indx; 3669 PetscScalar *values; 3670 3671 PetscFunctionBegin; 3672 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3673 if (scall == MAT_INITIAL_MATRIX){ 3674 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3675 if (n == PETSC_DECIDE){ 3676 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3677 } 3678 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3679 rstart -= m; 3680 3681 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3682 for (i=0;i<m;i++) { 3683 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3684 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3685 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3686 } 3687 /* This routine will ONLY return MPIAIJ type matrix */ 3688 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3689 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3690 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3691 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3692 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3693 3694 } else if (scall == MAT_REUSE_MATRIX){ 3695 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3696 } else { 3697 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3698 } 3699 3700 for (i=0;i<m;i++) { 3701 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3702 Ii = i + rstart; 3703 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3704 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3705 } 3706 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3707 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3708 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3709 3710 PetscFunctionReturn(0); 3711 } 3712 3713 #undef __FUNCT__ 3714 #define __FUNCT__ "MatFileSplit" 3715 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3716 { 3717 PetscErrorCode ierr; 3718 PetscMPIInt rank; 3719 PetscInt m,N,i,rstart,nnz; 3720 size_t len; 3721 const PetscInt *indx; 3722 PetscViewer out; 3723 char *name; 3724 Mat B; 3725 const PetscScalar *values; 3726 3727 PetscFunctionBegin; 3728 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3729 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3730 /* Should this be the type of the diagonal block of A? */ 3731 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3732 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3733 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3734 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3735 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3736 for (i=0;i<m;i++) { 3737 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3738 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3739 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3740 } 3741 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3742 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3743 3744 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3745 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3746 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3747 sprintf(name,"%s.%d",outfile,rank); 3748 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3749 ierr = PetscFree(name); 3750 ierr = MatView(B,out);CHKERRQ(ierr); 3751 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3752 ierr = MatDestroy(B);CHKERRQ(ierr); 3753 PetscFunctionReturn(0); 3754 } 3755 3756 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3757 #undef __FUNCT__ 3758 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3759 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3760 { 3761 PetscErrorCode ierr; 3762 Mat_Merge_SeqsToMPI *merge; 3763 PetscContainer container; 3764 3765 PetscFunctionBegin; 3766 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3767 if (container) { 3768 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3769 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3770 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3771 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3772 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3773 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3774 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3775 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3776 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3777 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3778 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3779 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3780 3781 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3782 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3783 } 3784 ierr = PetscFree(merge);CHKERRQ(ierr); 3785 3786 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3787 PetscFunctionReturn(0); 3788 } 3789 3790 #include "src/mat/utils/freespace.h" 3791 #include "petscbt.h" 3792 3793 #undef __FUNCT__ 3794 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3795 /*@C 3796 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3797 matrices from each processor 3798 3799 Collective on MPI_Comm 3800 3801 Input Parameters: 3802 + comm - the communicators the parallel matrix will live on 3803 . seqmat - the input sequential matrices 3804 . m - number of local rows (or PETSC_DECIDE) 3805 . n - number of local columns (or PETSC_DECIDE) 3806 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3807 3808 Output Parameter: 3809 . mpimat - the parallel matrix generated 3810 3811 Level: advanced 3812 3813 Notes: 3814 The dimensions of the sequential matrix in each processor MUST be the same. 3815 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3816 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3817 @*/ 3818 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3819 { 3820 PetscErrorCode ierr; 3821 MPI_Comm comm=((PetscObject)mpimat)->comm; 3822 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3823 PetscMPIInt size,rank,taga,*len_s; 3824 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3825 PetscInt proc,m; 3826 PetscInt **buf_ri,**buf_rj; 3827 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3828 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3829 MPI_Request *s_waits,*r_waits; 3830 MPI_Status *status; 3831 MatScalar *aa=a->a; 3832 MatScalar **abuf_r,*ba_i; 3833 Mat_Merge_SeqsToMPI *merge; 3834 PetscContainer container; 3835 3836 PetscFunctionBegin; 3837 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3838 3839 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3840 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3841 3842 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3843 if (container) { 3844 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3845 } 3846 bi = merge->bi; 3847 bj = merge->bj; 3848 buf_ri = merge->buf_ri; 3849 buf_rj = merge->buf_rj; 3850 3851 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3852 owners = merge->rowmap.range; 3853 len_s = merge->len_s; 3854 3855 /* send and recv matrix values */ 3856 /*-----------------------------*/ 3857 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3858 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3859 3860 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3861 for (proc=0,k=0; proc<size; proc++){ 3862 if (!len_s[proc]) continue; 3863 i = owners[proc]; 3864 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3865 k++; 3866 } 3867 3868 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3869 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3870 ierr = PetscFree(status);CHKERRQ(ierr); 3871 3872 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3873 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3874 3875 /* insert mat values of mpimat */ 3876 /*----------------------------*/ 3877 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 3878 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3879 nextrow = buf_ri_k + merge->nrecv; 3880 nextai = nextrow + merge->nrecv; 3881 3882 for (k=0; k<merge->nrecv; k++){ 3883 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3884 nrows = *(buf_ri_k[k]); 3885 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3886 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3887 } 3888 3889 /* set values of ba */ 3890 m = merge->rowmap.n; 3891 for (i=0; i<m; i++) { 3892 arow = owners[rank] + i; 3893 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3894 bnzi = bi[i+1] - bi[i]; 3895 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3896 3897 /* add local non-zero vals of this proc's seqmat into ba */ 3898 anzi = ai[arow+1] - ai[arow]; 3899 aj = a->j + ai[arow]; 3900 aa = a->a + ai[arow]; 3901 nextaj = 0; 3902 for (j=0; nextaj<anzi; j++){ 3903 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3904 ba_i[j] += aa[nextaj++]; 3905 } 3906 } 3907 3908 /* add received vals into ba */ 3909 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3910 /* i-th row */ 3911 if (i == *nextrow[k]) { 3912 anzi = *(nextai[k]+1) - *nextai[k]; 3913 aj = buf_rj[k] + *(nextai[k]); 3914 aa = abuf_r[k] + *(nextai[k]); 3915 nextaj = 0; 3916 for (j=0; nextaj<anzi; j++){ 3917 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3918 ba_i[j] += aa[nextaj++]; 3919 } 3920 } 3921 nextrow[k]++; nextai[k]++; 3922 } 3923 } 3924 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3925 } 3926 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3927 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3928 3929 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3930 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3931 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3932 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3933 PetscFunctionReturn(0); 3934 } 3935 3936 #undef __FUNCT__ 3937 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3938 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3939 { 3940 PetscErrorCode ierr; 3941 Mat B_mpi; 3942 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3943 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3944 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3945 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3946 PetscInt len,proc,*dnz,*onz; 3947 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3948 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3949 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3950 MPI_Status *status; 3951 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3952 PetscBT lnkbt; 3953 Mat_Merge_SeqsToMPI *merge; 3954 PetscContainer container; 3955 3956 PetscFunctionBegin; 3957 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3958 3959 /* make sure it is a PETSc comm */ 3960 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3961 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3962 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3963 3964 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3965 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3966 3967 /* determine row ownership */ 3968 /*---------------------------------------------------------*/ 3969 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3970 merge->rowmap.n = m; 3971 merge->rowmap.N = M; 3972 merge->rowmap.bs = 1; 3973 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3974 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3975 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3976 3977 m = merge->rowmap.n; 3978 M = merge->rowmap.N; 3979 owners = merge->rowmap.range; 3980 3981 /* determine the number of messages to send, their lengths */ 3982 /*---------------------------------------------------------*/ 3983 len_s = merge->len_s; 3984 3985 len = 0; /* length of buf_si[] */ 3986 merge->nsend = 0; 3987 for (proc=0; proc<size; proc++){ 3988 len_si[proc] = 0; 3989 if (proc == rank){ 3990 len_s[proc] = 0; 3991 } else { 3992 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3993 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3994 } 3995 if (len_s[proc]) { 3996 merge->nsend++; 3997 nrows = 0; 3998 for (i=owners[proc]; i<owners[proc+1]; i++){ 3999 if (ai[i+1] > ai[i]) nrows++; 4000 } 4001 len_si[proc] = 2*(nrows+1); 4002 len += len_si[proc]; 4003 } 4004 } 4005 4006 /* determine the number and length of messages to receive for ij-structure */ 4007 /*-------------------------------------------------------------------------*/ 4008 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4009 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4010 4011 /* post the Irecv of j-structure */ 4012 /*-------------------------------*/ 4013 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4014 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4015 4016 /* post the Isend of j-structure */ 4017 /*--------------------------------*/ 4018 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4019 sj_waits = si_waits + merge->nsend; 4020 4021 for (proc=0, k=0; proc<size; proc++){ 4022 if (!len_s[proc]) continue; 4023 i = owners[proc]; 4024 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4025 k++; 4026 } 4027 4028 /* receives and sends of j-structure are complete */ 4029 /*------------------------------------------------*/ 4030 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4031 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4032 4033 /* send and recv i-structure */ 4034 /*---------------------------*/ 4035 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4036 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4037 4038 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4039 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4040 for (proc=0,k=0; proc<size; proc++){ 4041 if (!len_s[proc]) continue; 4042 /* form outgoing message for i-structure: 4043 buf_si[0]: nrows to be sent 4044 [1:nrows]: row index (global) 4045 [nrows+1:2*nrows+1]: i-structure index 4046 */ 4047 /*-------------------------------------------*/ 4048 nrows = len_si[proc]/2 - 1; 4049 buf_si_i = buf_si + nrows+1; 4050 buf_si[0] = nrows; 4051 buf_si_i[0] = 0; 4052 nrows = 0; 4053 for (i=owners[proc]; i<owners[proc+1]; i++){ 4054 anzi = ai[i+1] - ai[i]; 4055 if (anzi) { 4056 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4057 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4058 nrows++; 4059 } 4060 } 4061 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4062 k++; 4063 buf_si += len_si[proc]; 4064 } 4065 4066 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4067 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4068 4069 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4070 for (i=0; i<merge->nrecv; i++){ 4071 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4072 } 4073 4074 ierr = PetscFree(len_si);CHKERRQ(ierr); 4075 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4076 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4077 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4078 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4079 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4080 ierr = PetscFree(status);CHKERRQ(ierr); 4081 4082 /* compute a local seq matrix in each processor */ 4083 /*----------------------------------------------*/ 4084 /* allocate bi array and free space for accumulating nonzero column info */ 4085 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4086 bi[0] = 0; 4087 4088 /* create and initialize a linked list */ 4089 nlnk = N+1; 4090 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4091 4092 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4093 len = 0; 4094 len = ai[owners[rank+1]] - ai[owners[rank]]; 4095 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4096 current_space = free_space; 4097 4098 /* determine symbolic info for each local row */ 4099 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4100 nextrow = buf_ri_k + merge->nrecv; 4101 nextai = nextrow + merge->nrecv; 4102 for (k=0; k<merge->nrecv; k++){ 4103 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4104 nrows = *buf_ri_k[k]; 4105 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4106 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4107 } 4108 4109 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4110 len = 0; 4111 for (i=0;i<m;i++) { 4112 bnzi = 0; 4113 /* add local non-zero cols of this proc's seqmat into lnk */ 4114 arow = owners[rank] + i; 4115 anzi = ai[arow+1] - ai[arow]; 4116 aj = a->j + ai[arow]; 4117 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4118 bnzi += nlnk; 4119 /* add received col data into lnk */ 4120 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4121 if (i == *nextrow[k]) { /* i-th row */ 4122 anzi = *(nextai[k]+1) - *nextai[k]; 4123 aj = buf_rj[k] + *nextai[k]; 4124 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4125 bnzi += nlnk; 4126 nextrow[k]++; nextai[k]++; 4127 } 4128 } 4129 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4130 4131 /* if free space is not available, make more free space */ 4132 if (current_space->local_remaining<bnzi) { 4133 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4134 nspacedouble++; 4135 } 4136 /* copy data into free space, then initialize lnk */ 4137 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4138 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4139 4140 current_space->array += bnzi; 4141 current_space->local_used += bnzi; 4142 current_space->local_remaining -= bnzi; 4143 4144 bi[i+1] = bi[i] + bnzi; 4145 } 4146 4147 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4148 4149 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4150 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4151 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4152 4153 /* create symbolic parallel matrix B_mpi */ 4154 /*---------------------------------------*/ 4155 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4156 if (n==PETSC_DECIDE) { 4157 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4158 } else { 4159 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4160 } 4161 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4162 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4163 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4164 4165 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4166 B_mpi->assembled = PETSC_FALSE; 4167 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4168 merge->bi = bi; 4169 merge->bj = bj; 4170 merge->buf_ri = buf_ri; 4171 merge->buf_rj = buf_rj; 4172 merge->coi = PETSC_NULL; 4173 merge->coj = PETSC_NULL; 4174 merge->owners_co = PETSC_NULL; 4175 4176 /* attach the supporting struct to B_mpi for reuse */ 4177 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4178 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4179 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4180 *mpimat = B_mpi; 4181 4182 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4183 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4184 PetscFunctionReturn(0); 4185 } 4186 4187 #undef __FUNCT__ 4188 #define __FUNCT__ "MatMerge_SeqsToMPI" 4189 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4190 { 4191 PetscErrorCode ierr; 4192 4193 PetscFunctionBegin; 4194 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4195 if (scall == MAT_INITIAL_MATRIX){ 4196 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4197 } 4198 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4199 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4200 PetscFunctionReturn(0); 4201 } 4202 4203 #undef __FUNCT__ 4204 #define __FUNCT__ "MatGetLocalMat" 4205 /*@ 4206 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4207 4208 Not Collective 4209 4210 Input Parameters: 4211 + A - the matrix 4212 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4213 4214 Output Parameter: 4215 . A_loc - the local sequential matrix generated 4216 4217 Level: developer 4218 4219 @*/ 4220 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4221 { 4222 PetscErrorCode ierr; 4223 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4224 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4225 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4226 MatScalar *aa=a->a,*ba=b->a,*cam; 4227 PetscScalar *ca; 4228 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 4229 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4230 4231 PetscFunctionBegin; 4232 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4233 if (scall == MAT_INITIAL_MATRIX){ 4234 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4235 ci[0] = 0; 4236 for (i=0; i<am; i++){ 4237 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4238 } 4239 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4240 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4241 k = 0; 4242 for (i=0; i<am; i++) { 4243 ncols_o = bi[i+1] - bi[i]; 4244 ncols_d = ai[i+1] - ai[i]; 4245 /* off-diagonal portion of A */ 4246 for (jo=0; jo<ncols_o; jo++) { 4247 col = cmap[*bj]; 4248 if (col >= cstart) break; 4249 cj[k] = col; bj++; 4250 ca[k++] = *ba++; 4251 } 4252 /* diagonal portion of A */ 4253 for (j=0; j<ncols_d; j++) { 4254 cj[k] = cstart + *aj++; 4255 ca[k++] = *aa++; 4256 } 4257 /* off-diagonal portion of A */ 4258 for (j=jo; j<ncols_o; j++) { 4259 cj[k] = cmap[*bj++]; 4260 ca[k++] = *ba++; 4261 } 4262 } 4263 /* put together the new matrix */ 4264 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4265 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4266 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4267 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4268 mat->free_a = PETSC_TRUE; 4269 mat->free_ij = PETSC_TRUE; 4270 mat->nonew = 0; 4271 } else if (scall == MAT_REUSE_MATRIX){ 4272 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4273 ci = mat->i; cj = mat->j; cam = mat->a; 4274 for (i=0; i<am; i++) { 4275 /* off-diagonal portion of A */ 4276 ncols_o = bi[i+1] - bi[i]; 4277 for (jo=0; jo<ncols_o; jo++) { 4278 col = cmap[*bj]; 4279 if (col >= cstart) break; 4280 *cam++ = *ba++; bj++; 4281 } 4282 /* diagonal portion of A */ 4283 ncols_d = ai[i+1] - ai[i]; 4284 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4285 /* off-diagonal portion of A */ 4286 for (j=jo; j<ncols_o; j++) { 4287 *cam++ = *ba++; bj++; 4288 } 4289 } 4290 } else { 4291 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4292 } 4293 4294 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4295 PetscFunctionReturn(0); 4296 } 4297 4298 #undef __FUNCT__ 4299 #define __FUNCT__ "MatGetLocalMatCondensed" 4300 /*@C 4301 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4302 4303 Not Collective 4304 4305 Input Parameters: 4306 + A - the matrix 4307 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4308 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4309 4310 Output Parameter: 4311 . A_loc - the local sequential matrix generated 4312 4313 Level: developer 4314 4315 @*/ 4316 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4317 { 4318 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4319 PetscErrorCode ierr; 4320 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4321 IS isrowa,iscola; 4322 Mat *aloc; 4323 4324 PetscFunctionBegin; 4325 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4326 if (!row){ 4327 start = A->rmap.rstart; end = A->rmap.rend; 4328 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4329 } else { 4330 isrowa = *row; 4331 } 4332 if (!col){ 4333 start = A->cmap.rstart; 4334 cmap = a->garray; 4335 nzA = a->A->cmap.n; 4336 nzB = a->B->cmap.n; 4337 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4338 ncols = 0; 4339 for (i=0; i<nzB; i++) { 4340 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4341 else break; 4342 } 4343 imark = i; 4344 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4345 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4346 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4347 ierr = PetscFree(idx);CHKERRQ(ierr); 4348 } else { 4349 iscola = *col; 4350 } 4351 if (scall != MAT_INITIAL_MATRIX){ 4352 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4353 aloc[0] = *A_loc; 4354 } 4355 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4356 *A_loc = aloc[0]; 4357 ierr = PetscFree(aloc);CHKERRQ(ierr); 4358 if (!row){ 4359 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4360 } 4361 if (!col){ 4362 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4363 } 4364 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4365 PetscFunctionReturn(0); 4366 } 4367 4368 #undef __FUNCT__ 4369 #define __FUNCT__ "MatGetBrowsOfAcols" 4370 /*@C 4371 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4372 4373 Collective on Mat 4374 4375 Input Parameters: 4376 + A,B - the matrices in mpiaij format 4377 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4378 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4379 4380 Output Parameter: 4381 + rowb, colb - index sets of rows and columns of B to extract 4382 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4383 - B_seq - the sequential matrix generated 4384 4385 Level: developer 4386 4387 @*/ 4388 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4389 { 4390 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4391 PetscErrorCode ierr; 4392 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4393 IS isrowb,iscolb; 4394 Mat *bseq; 4395 4396 PetscFunctionBegin; 4397 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4398 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4399 } 4400 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4401 4402 if (scall == MAT_INITIAL_MATRIX){ 4403 start = A->cmap.rstart; 4404 cmap = a->garray; 4405 nzA = a->A->cmap.n; 4406 nzB = a->B->cmap.n; 4407 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4408 ncols = 0; 4409 for (i=0; i<nzB; i++) { /* row < local row index */ 4410 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4411 else break; 4412 } 4413 imark = i; 4414 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4415 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4416 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4417 ierr = PetscFree(idx);CHKERRQ(ierr); 4418 *brstart = imark; 4419 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4420 } else { 4421 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4422 isrowb = *rowb; iscolb = *colb; 4423 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4424 bseq[0] = *B_seq; 4425 } 4426 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4427 *B_seq = bseq[0]; 4428 ierr = PetscFree(bseq);CHKERRQ(ierr); 4429 if (!rowb){ 4430 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4431 } else { 4432 *rowb = isrowb; 4433 } 4434 if (!colb){ 4435 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4436 } else { 4437 *colb = iscolb; 4438 } 4439 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4440 PetscFunctionReturn(0); 4441 } 4442 4443 #undef __FUNCT__ 4444 #define __FUNCT__ "MatGetBrowsOfAoCols" 4445 /*@C 4446 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4447 of the OFF-DIAGONAL portion of local A 4448 4449 Collective on Mat 4450 4451 Input Parameters: 4452 + A,B - the matrices in mpiaij format 4453 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4454 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4455 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4456 4457 Output Parameter: 4458 + B_oth - the sequential matrix generated 4459 4460 Level: developer 4461 4462 @*/ 4463 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4464 { 4465 VecScatter_MPI_General *gen_to,*gen_from; 4466 PetscErrorCode ierr; 4467 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4468 Mat_SeqAIJ *b_oth; 4469 VecScatter ctx=a->Mvctx; 4470 MPI_Comm comm=((PetscObject)ctx)->comm; 4471 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4472 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4473 PetscScalar *rvalues,*svalues; 4474 MatScalar *b_otha,*bufa,*bufA; 4475 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4476 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4477 MPI_Status *sstatus,rstatus; 4478 PetscMPIInt jj; 4479 PetscInt *cols,sbs,rbs; 4480 PetscScalar *vals; 4481 4482 PetscFunctionBegin; 4483 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4484 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4485 } 4486 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4487 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4488 4489 gen_to = (VecScatter_MPI_General*)ctx->todata; 4490 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4491 rvalues = gen_from->values; /* holds the length of receiving row */ 4492 svalues = gen_to->values; /* holds the length of sending row */ 4493 nrecvs = gen_from->n; 4494 nsends = gen_to->n; 4495 4496 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4497 srow = gen_to->indices; /* local row index to be sent */ 4498 sstarts = gen_to->starts; 4499 sprocs = gen_to->procs; 4500 sstatus = gen_to->sstatus; 4501 sbs = gen_to->bs; 4502 rstarts = gen_from->starts; 4503 rprocs = gen_from->procs; 4504 rbs = gen_from->bs; 4505 4506 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4507 if (scall == MAT_INITIAL_MATRIX){ 4508 /* i-array */ 4509 /*---------*/ 4510 /* post receives */ 4511 for (i=0; i<nrecvs; i++){ 4512 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4513 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4514 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4515 } 4516 4517 /* pack the outgoing message */ 4518 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4519 rstartsj = sstartsj + nsends +1; 4520 sstartsj[0] = 0; rstartsj[0] = 0; 4521 len = 0; /* total length of j or a array to be sent */ 4522 k = 0; 4523 for (i=0; i<nsends; i++){ 4524 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4525 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4526 for (j=0; j<nrows; j++) { 4527 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4528 for (l=0; l<sbs; l++){ 4529 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4530 rowlen[j*sbs+l] = ncols; 4531 len += ncols; 4532 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4533 } 4534 k++; 4535 } 4536 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4537 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4538 } 4539 /* recvs and sends of i-array are completed */ 4540 i = nrecvs; 4541 while (i--) { 4542 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4543 } 4544 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4545 4546 /* allocate buffers for sending j and a arrays */ 4547 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4548 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4549 4550 /* create i-array of B_oth */ 4551 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4552 b_othi[0] = 0; 4553 len = 0; /* total length of j or a array to be received */ 4554 k = 0; 4555 for (i=0; i<nrecvs; i++){ 4556 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4557 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4558 for (j=0; j<nrows; j++) { 4559 b_othi[k+1] = b_othi[k] + rowlen[j]; 4560 len += rowlen[j]; k++; 4561 } 4562 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4563 } 4564 4565 /* allocate space for j and a arrrays of B_oth */ 4566 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4567 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4568 4569 /* j-array */ 4570 /*---------*/ 4571 /* post receives of j-array */ 4572 for (i=0; i<nrecvs; i++){ 4573 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4574 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4575 } 4576 4577 /* pack the outgoing message j-array */ 4578 k = 0; 4579 for (i=0; i<nsends; i++){ 4580 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4581 bufJ = bufj+sstartsj[i]; 4582 for (j=0; j<nrows; j++) { 4583 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4584 for (ll=0; ll<sbs; ll++){ 4585 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4586 for (l=0; l<ncols; l++){ 4587 *bufJ++ = cols[l]; 4588 } 4589 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4590 } 4591 } 4592 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4593 } 4594 4595 /* recvs and sends of j-array are completed */ 4596 i = nrecvs; 4597 while (i--) { 4598 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4599 } 4600 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4601 } else if (scall == MAT_REUSE_MATRIX){ 4602 sstartsj = *startsj; 4603 rstartsj = sstartsj + nsends +1; 4604 bufa = *bufa_ptr; 4605 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4606 b_otha = b_oth->a; 4607 } else { 4608 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4609 } 4610 4611 /* a-array */ 4612 /*---------*/ 4613 /* post receives of a-array */ 4614 for (i=0; i<nrecvs; i++){ 4615 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4616 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4617 } 4618 4619 /* pack the outgoing message a-array */ 4620 k = 0; 4621 for (i=0; i<nsends; i++){ 4622 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4623 bufA = bufa+sstartsj[i]; 4624 for (j=0; j<nrows; j++) { 4625 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4626 for (ll=0; ll<sbs; ll++){ 4627 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4628 for (l=0; l<ncols; l++){ 4629 *bufA++ = vals[l]; 4630 } 4631 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4632 } 4633 } 4634 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4635 } 4636 /* recvs and sends of a-array are completed */ 4637 i = nrecvs; 4638 while (i--) { 4639 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4640 } 4641 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4642 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4643 4644 if (scall == MAT_INITIAL_MATRIX){ 4645 /* put together the new matrix */ 4646 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4647 4648 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4649 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4650 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4651 b_oth->free_a = PETSC_TRUE; 4652 b_oth->free_ij = PETSC_TRUE; 4653 b_oth->nonew = 0; 4654 4655 ierr = PetscFree(bufj);CHKERRQ(ierr); 4656 if (!startsj || !bufa_ptr){ 4657 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4658 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4659 } else { 4660 *startsj = sstartsj; 4661 *bufa_ptr = bufa; 4662 } 4663 } 4664 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4665 PetscFunctionReturn(0); 4666 } 4667 4668 #undef __FUNCT__ 4669 #define __FUNCT__ "MatGetCommunicationStructs" 4670 /*@C 4671 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4672 4673 Not Collective 4674 4675 Input Parameters: 4676 . A - The matrix in mpiaij format 4677 4678 Output Parameter: 4679 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4680 . colmap - A map from global column index to local index into lvec 4681 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4682 4683 Level: developer 4684 4685 @*/ 4686 #if defined (PETSC_USE_CTABLE) 4687 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4688 #else 4689 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4690 #endif 4691 { 4692 Mat_MPIAIJ *a; 4693 4694 PetscFunctionBegin; 4695 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4696 PetscValidPointer(lvec, 2) 4697 PetscValidPointer(colmap, 3) 4698 PetscValidPointer(multScatter, 4) 4699 a = (Mat_MPIAIJ *) A->data; 4700 if (lvec) *lvec = a->lvec; 4701 if (colmap) *colmap = a->colmap; 4702 if (multScatter) *multScatter = a->Mvctx; 4703 PetscFunctionReturn(0); 4704 } 4705 4706 EXTERN_C_BEGIN 4707 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4708 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4709 EXTERN_C_END 4710 4711 #include "src/mat/impls/dense/mpi/mpidense.h" 4712 4713 #undef __FUNCT__ 4714 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4715 /* 4716 Computes (B'*A')' since computing B*A directly is untenable 4717 4718 n p p 4719 ( ) ( ) ( ) 4720 m ( A ) * n ( B ) = m ( C ) 4721 ( ) ( ) ( ) 4722 4723 */ 4724 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4725 { 4726 PetscErrorCode ierr; 4727 Mat_MPIDense *sub_a = (Mat_MPIDense*)A->data; 4728 Mat_MPIAIJ *sub_b = (Mat_MPIAIJ*)B->data; 4729 Mat_MPIDense *sub_c = (Mat_MPIDense*)C->data; 4730 PetscInt i,n,m,q,p; 4731 const PetscInt *ii,*idx; 4732 const PetscScalar *b,*a,*a_q; 4733 PetscScalar *c,*c_q; 4734 Mat At,Bt,Ct; 4735 4736 PetscFunctionBegin; 4737 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4738 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4739 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4740 ierr = MatDestroy(At);CHKERRQ(ierr); 4741 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4742 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4743 4744 C->assembled = PETSC_TRUE; 4745 ierr = MatAssemblyBegin(sub_c->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4746 ierr = MatAssemblyEnd(sub_c->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4747 if (!C->was_assembled) { 4748 ierr = MatSetUpMultiply_MPIDense(C);CHKERRQ(ierr); 4749 } 4750 PetscFunctionReturn(0); 4751 } 4752 4753 #undef __FUNCT__ 4754 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4755 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4756 { 4757 PetscErrorCode ierr; 4758 PetscInt m=A->rmap.n,n=B->cmap.n; 4759 Mat Cmat; 4760 4761 PetscFunctionBegin; 4762 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 4763 ierr = MatCreate(A->hdr.comm,&Cmat);CHKERRQ(ierr); 4764 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4765 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4766 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4767 Cmat->assembled = PETSC_TRUE; 4768 *C = Cmat; 4769 PetscFunctionReturn(0); 4770 } 4771 4772 /* ----------------------------------------------------------------*/ 4773 #undef __FUNCT__ 4774 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4775 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4776 { 4777 PetscErrorCode ierr; 4778 4779 PetscFunctionBegin; 4780 if (scall == MAT_INITIAL_MATRIX){ 4781 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4782 } 4783 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4784 PetscFunctionReturn(0); 4785 } 4786 4787 /*MC 4788 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4789 4790 Options Database Keys: 4791 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4792 4793 Level: beginner 4794 4795 .seealso: MatCreateMPIAIJ() 4796 M*/ 4797 4798 EXTERN_C_BEGIN 4799 #undef __FUNCT__ 4800 #define __FUNCT__ "MatCreate_MPIAIJ" 4801 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4802 { 4803 Mat_MPIAIJ *b; 4804 PetscErrorCode ierr; 4805 PetscMPIInt size; 4806 4807 PetscFunctionBegin; 4808 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4809 4810 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4811 B->data = (void*)b; 4812 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4813 B->factor = 0; 4814 B->rmap.bs = 1; 4815 B->assembled = PETSC_FALSE; 4816 B->mapping = 0; 4817 4818 B->insertmode = NOT_SET_VALUES; 4819 b->size = size; 4820 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4821 4822 /* build cache for off array entries formed */ 4823 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4824 b->donotstash = PETSC_FALSE; 4825 b->colmap = 0; 4826 b->garray = 0; 4827 b->roworiented = PETSC_TRUE; 4828 4829 /* stuff used for matrix vector multiply */ 4830 b->lvec = PETSC_NULL; 4831 b->Mvctx = PETSC_NULL; 4832 4833 /* stuff for MatGetRow() */ 4834 b->rowindices = 0; 4835 b->rowvalues = 0; 4836 b->getrowactive = PETSC_FALSE; 4837 4838 4839 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4840 "MatStoreValues_MPIAIJ", 4841 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4842 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4843 "MatRetrieveValues_MPIAIJ", 4844 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4845 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4846 "MatGetDiagonalBlock_MPIAIJ", 4847 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4848 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4849 "MatIsTranspose_MPIAIJ", 4850 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4851 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4852 "MatMPIAIJSetPreallocation_MPIAIJ", 4853 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4854 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4855 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4856 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4857 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4858 "MatDiagonalScaleLocal_MPIAIJ", 4859 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4861 "MatConvert_MPIAIJ_MPICSRPERM", 4862 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4863 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4864 "MatConvert_MPIAIJ_MPICRL", 4865 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4866 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 4867 "MatMatMult_MPIDense_MPIAIJ", 4868 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 4869 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 4870 "MatMatMultSymbolic_MPIDense_MPIAIJ", 4871 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 4872 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 4873 "MatMatMultNumeric_MPIDense_MPIAIJ", 4874 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 4875 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4876 PetscFunctionReturn(0); 4877 } 4878 EXTERN_C_END 4879 4880 #undef __FUNCT__ 4881 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4882 /*@ 4883 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4884 and "off-diagonal" part of the matrix in CSR format. 4885 4886 Collective on MPI_Comm 4887 4888 Input Parameters: 4889 + comm - MPI communicator 4890 . m - number of local rows (Cannot be PETSC_DECIDE) 4891 . n - This value should be the same as the local size used in creating the 4892 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4893 calculated if N is given) For square matrices n is almost always m. 4894 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4895 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4896 . i - row indices for "diagonal" portion of matrix 4897 . j - column indices 4898 . a - matrix values 4899 . oi - row indices for "off-diagonal" portion of matrix 4900 . oj - column indices 4901 - oa - matrix values 4902 4903 Output Parameter: 4904 . mat - the matrix 4905 4906 Level: advanced 4907 4908 Notes: 4909 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4910 4911 The i and j indices are 0 based 4912 4913 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4914 4915 4916 .keywords: matrix, aij, compressed row, sparse, parallel 4917 4918 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4919 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4920 @*/ 4921 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4922 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4923 { 4924 PetscErrorCode ierr; 4925 Mat_MPIAIJ *maij; 4926 4927 PetscFunctionBegin; 4928 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4929 if (i[0]) { 4930 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4931 } 4932 if (oi[0]) { 4933 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4934 } 4935 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4936 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4937 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4938 maij = (Mat_MPIAIJ*) (*mat)->data; 4939 maij->donotstash = PETSC_TRUE; 4940 (*mat)->preallocated = PETSC_TRUE; 4941 4942 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4943 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4944 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4945 4946 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4947 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4948 4949 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4950 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4951 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4952 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4953 4954 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4955 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4956 PetscFunctionReturn(0); 4957 } 4958 4959 /* 4960 Special version for direct calls from Fortran 4961 */ 4962 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4963 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4964 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4965 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4966 #endif 4967 4968 /* Change these macros so can be used in void function */ 4969 #undef CHKERRQ 4970 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4971 #undef SETERRQ2 4972 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4973 #undef SETERRQ 4974 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4975 4976 EXTERN_C_BEGIN 4977 #undef __FUNCT__ 4978 #define __FUNCT__ "matsetvaluesmpiaij_" 4979 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4980 { 4981 Mat mat = *mmat; 4982 PetscInt m = *mm, n = *mn; 4983 InsertMode addv = *maddv; 4984 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4985 PetscScalar value; 4986 PetscErrorCode ierr; 4987 4988 MatPreallocated(mat); 4989 if (mat->insertmode == NOT_SET_VALUES) { 4990 mat->insertmode = addv; 4991 } 4992 #if defined(PETSC_USE_DEBUG) 4993 else if (mat->insertmode != addv) { 4994 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4995 } 4996 #endif 4997 { 4998 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4999 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 5000 PetscTruth roworiented = aij->roworiented; 5001 5002 /* Some Variables required in the macro */ 5003 Mat A = aij->A; 5004 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5005 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5006 MatScalar *aa = a->a; 5007 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5008 Mat B = aij->B; 5009 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5010 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 5011 MatScalar *ba = b->a; 5012 5013 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5014 PetscInt nonew = a->nonew; 5015 MatScalar *ap1,*ap2; 5016 5017 PetscFunctionBegin; 5018 for (i=0; i<m; i++) { 5019 if (im[i] < 0) continue; 5020 #if defined(PETSC_USE_DEBUG) 5021 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 5022 #endif 5023 if (im[i] >= rstart && im[i] < rend) { 5024 row = im[i] - rstart; 5025 lastcol1 = -1; 5026 rp1 = aj + ai[row]; 5027 ap1 = aa + ai[row]; 5028 rmax1 = aimax[row]; 5029 nrow1 = ailen[row]; 5030 low1 = 0; 5031 high1 = nrow1; 5032 lastcol2 = -1; 5033 rp2 = bj + bi[row]; 5034 ap2 = ba + bi[row]; 5035 rmax2 = bimax[row]; 5036 nrow2 = bilen[row]; 5037 low2 = 0; 5038 high2 = nrow2; 5039 5040 for (j=0; j<n; j++) { 5041 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5042 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5043 if (in[j] >= cstart && in[j] < cend){ 5044 col = in[j] - cstart; 5045 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5046 } else if (in[j] < 0) continue; 5047 #if defined(PETSC_USE_DEBUG) 5048 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 5049 #endif 5050 else { 5051 if (mat->was_assembled) { 5052 if (!aij->colmap) { 5053 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5054 } 5055 #if defined (PETSC_USE_CTABLE) 5056 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5057 col--; 5058 #else 5059 col = aij->colmap[in[j]] - 1; 5060 #endif 5061 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5062 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5063 col = in[j]; 5064 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5065 B = aij->B; 5066 b = (Mat_SeqAIJ*)B->data; 5067 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5068 rp2 = bj + bi[row]; 5069 ap2 = ba + bi[row]; 5070 rmax2 = bimax[row]; 5071 nrow2 = bilen[row]; 5072 low2 = 0; 5073 high2 = nrow2; 5074 bm = aij->B->rmap.n; 5075 ba = b->a; 5076 } 5077 } else col = in[j]; 5078 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5079 } 5080 } 5081 } else { 5082 if (!aij->donotstash) { 5083 if (roworiented) { 5084 if (ignorezeroentries && v[i*n] == 0.0) continue; 5085 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5086 } else { 5087 if (ignorezeroentries && v[i] == 0.0) continue; 5088 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5089 } 5090 } 5091 } 5092 }} 5093 PetscFunctionReturnVoid(); 5094 } 5095 EXTERN_C_END 5096 5097