1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "../src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 456 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 457 { 458 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 459 PetscErrorCode ierr; 460 PetscInt nstash,reallocs; 461 InsertMode addv; 462 463 PetscFunctionBegin; 464 if (aij->donotstash) { 465 PetscFunctionReturn(0); 466 } 467 468 /* make sure all processors are either in INSERTMODE or ADDMODE */ 469 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 470 if (addv == (ADD_VALUES|INSERT_VALUES)) { 471 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 472 } 473 mat->insertmode = addv; /* in case this processor had no cache */ 474 475 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 476 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 477 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 483 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 484 { 485 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 486 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 487 PetscErrorCode ierr; 488 PetscMPIInt n; 489 PetscInt i,j,rstart,ncols,flg; 490 PetscInt *row,*col; 491 PetscTruth other_disassembled; 492 PetscScalar *val; 493 InsertMode addv = mat->insertmode; 494 495 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 496 PetscFunctionBegin; 497 if (!aij->donotstash) { 498 while (1) { 499 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 500 if (!flg) break; 501 502 for (i=0; i<n;) { 503 /* Now identify the consecutive vals belonging to the same row */ 504 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 505 if (j < n) ncols = j-i; 506 else ncols = n-i; 507 /* Now assemble all these values with a single function call */ 508 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 509 i = j; 510 } 511 } 512 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 513 } 514 a->compressedrow.use = PETSC_FALSE; 515 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 516 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 517 518 /* determine if any processor has disassembled, if so we must 519 also disassemble ourselfs, in order that we may reassemble. */ 520 /* 521 if nonzero structure of submatrix B cannot change then we know that 522 no processor disassembled thus we can skip this stuff 523 */ 524 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 525 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 526 if (mat->was_assembled && !other_disassembled) { 527 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 } 530 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 531 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 532 } 533 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 534 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 535 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 536 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 537 538 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 539 aij->rowvalues = 0; 540 541 /* used by MatAXPY() */ 542 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 543 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 544 545 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 546 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 552 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 553 { 554 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 555 PetscErrorCode ierr; 556 557 PetscFunctionBegin; 558 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 559 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatZeroRows_MPIAIJ" 565 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 566 { 567 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 568 PetscErrorCode ierr; 569 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 570 PetscInt i,*owners = A->rmap->range; 571 PetscInt *nprocs,j,idx,nsends,row; 572 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 573 PetscInt *rvalues,count,base,slen,*source; 574 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 575 MPI_Comm comm = ((PetscObject)A)->comm; 576 MPI_Request *send_waits,*recv_waits; 577 MPI_Status recv_status,*send_status; 578 #if defined(PETSC_DEBUG) 579 PetscTruth found = PETSC_FALSE; 580 #endif 581 582 PetscFunctionBegin; 583 /* first count number of contributors to each processor */ 584 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 585 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 586 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 587 j = 0; 588 for (i=0; i<N; i++) { 589 if (lastidx > (idx = rows[i])) j = 0; 590 lastidx = idx; 591 for (; j<size; j++) { 592 if (idx >= owners[j] && idx < owners[j+1]) { 593 nprocs[2*j]++; 594 nprocs[2*j+1] = 1; 595 owner[i] = j; 596 #if defined(PETSC_DEBUG) 597 found = PETSC_TRUE; 598 #endif 599 break; 600 } 601 } 602 #if defined(PETSC_DEBUG) 603 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 604 found = PETSC_FALSE; 605 #endif 606 } 607 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 608 609 /* inform other processors of number of messages and max length*/ 610 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 611 612 /* post receives: */ 613 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 614 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 615 for (i=0; i<nrecvs; i++) { 616 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 617 } 618 619 /* do sends: 620 1) starts[i] gives the starting index in svalues for stuff going to 621 the ith processor 622 */ 623 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 624 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 625 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 626 starts[0] = 0; 627 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 628 for (i=0; i<N; i++) { 629 svalues[starts[owner[i]]++] = rows[i]; 630 } 631 632 starts[0] = 0; 633 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 634 count = 0; 635 for (i=0; i<size; i++) { 636 if (nprocs[2*i+1]) { 637 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 638 } 639 } 640 ierr = PetscFree(starts);CHKERRQ(ierr); 641 642 base = owners[rank]; 643 644 /* wait on receives */ 645 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 646 source = lens + nrecvs; 647 count = nrecvs; slen = 0; 648 while (count) { 649 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 650 /* unpack receives into our local space */ 651 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 652 source[imdex] = recv_status.MPI_SOURCE; 653 lens[imdex] = n; 654 slen += n; 655 count--; 656 } 657 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 658 659 /* move the data into the send scatter */ 660 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 661 count = 0; 662 for (i=0; i<nrecvs; i++) { 663 values = rvalues + i*nmax; 664 for (j=0; j<lens[i]; j++) { 665 lrows[count++] = values[j] - base; 666 } 667 } 668 ierr = PetscFree(rvalues);CHKERRQ(ierr); 669 ierr = PetscFree(lens);CHKERRQ(ierr); 670 ierr = PetscFree(owner);CHKERRQ(ierr); 671 ierr = PetscFree(nprocs);CHKERRQ(ierr); 672 673 /* actually zap the local rows */ 674 /* 675 Zero the required rows. If the "diagonal block" of the matrix 676 is square and the user wishes to set the diagonal we use separate 677 code so that MatSetValues() is not called for each diagonal allocating 678 new memory, thus calling lots of mallocs and slowing things down. 679 680 Contributed by: Matthew Knepley 681 */ 682 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 683 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 684 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 685 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 686 } else if (diag != 0.0) { 687 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 688 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 689 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 691 } 692 for (i = 0; i < slen; i++) { 693 row = lrows[i] + rstart; 694 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 695 } 696 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 697 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 698 } else { 699 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 700 } 701 ierr = PetscFree(lrows);CHKERRQ(ierr); 702 703 /* wait on sends */ 704 if (nsends) { 705 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 706 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 707 ierr = PetscFree(send_status);CHKERRQ(ierr); 708 } 709 ierr = PetscFree(send_waits);CHKERRQ(ierr); 710 ierr = PetscFree(svalues);CHKERRQ(ierr); 711 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatMult_MPIAIJ" 717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 PetscInt nt; 722 723 PetscFunctionBegin; 724 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 725 if (nt != A->cmap->n) { 726 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 727 } 728 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 730 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 731 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 737 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 738 { 739 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 740 PetscErrorCode ierr; 741 742 PetscFunctionBegin; 743 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatMultAdd_MPIAIJ" 749 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 750 { 751 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 756 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 757 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 758 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 764 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 765 { 766 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 767 PetscErrorCode ierr; 768 PetscTruth merged; 769 770 PetscFunctionBegin; 771 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 772 /* do nondiagonal part */ 773 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 774 if (!merged) { 775 /* send it on its way */ 776 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 777 /* do local part */ 778 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 779 /* receive remote parts: note this assumes the values are not actually */ 780 /* added in yy until the next line, */ 781 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 782 } else { 783 /* do local part */ 784 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 785 /* send it on its way */ 786 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 787 /* values actually were received in the Begin() but we need to call this nop */ 788 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 789 } 790 PetscFunctionReturn(0); 791 } 792 793 EXTERN_C_BEGIN 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 796 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 797 { 798 MPI_Comm comm; 799 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 800 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 801 IS Me,Notme; 802 PetscErrorCode ierr; 803 PetscInt M,N,first,last,*notme,i; 804 PetscMPIInt size; 805 806 PetscFunctionBegin; 807 808 /* Easy test: symmetric diagonal block */ 809 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 810 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 811 if (!*f) PetscFunctionReturn(0); 812 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 813 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 814 if (size == 1) PetscFunctionReturn(0); 815 816 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 817 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 818 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 819 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 820 for (i=0; i<first; i++) notme[i] = i; 821 for (i=last; i<M; i++) notme[i-last+first] = i; 822 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 823 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 824 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 825 Aoff = Aoffs[0]; 826 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 827 Boff = Boffs[0]; 828 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 829 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 830 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 831 ierr = ISDestroy(Me);CHKERRQ(ierr); 832 ierr = ISDestroy(Notme);CHKERRQ(ierr); 833 834 PetscFunctionReturn(0); 835 } 836 EXTERN_C_END 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 840 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 841 { 842 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 843 PetscErrorCode ierr; 844 845 PetscFunctionBegin; 846 /* do nondiagonal part */ 847 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 848 /* send it on its way */ 849 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 /* do local part */ 851 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 852 /* receive remote parts */ 853 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 /* 858 This only works correctly for square matrices where the subblock A->A is the 859 diagonal block 860 */ 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 863 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 864 { 865 PetscErrorCode ierr; 866 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 867 868 PetscFunctionBegin; 869 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 870 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 871 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 872 } 873 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatScale_MPIAIJ" 879 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 880 { 881 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 882 PetscErrorCode ierr; 883 884 PetscFunctionBegin; 885 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 886 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatDestroy_MPIAIJ" 892 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 893 { 894 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 895 PetscErrorCode ierr; 896 897 PetscFunctionBegin; 898 #if defined(PETSC_USE_LOG) 899 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 900 #endif 901 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 902 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 903 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 904 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 905 #if defined (PETSC_USE_CTABLE) 906 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 907 #else 908 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 909 #endif 910 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 911 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 912 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 913 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 914 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 915 ierr = PetscFree(aij);CHKERRQ(ierr); 916 917 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 922 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 923 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 924 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatView_MPIAIJ_Binary" 930 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 931 { 932 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 933 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 934 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 935 PetscErrorCode ierr; 936 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 937 int fd; 938 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 939 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 940 PetscScalar *column_values; 941 942 PetscFunctionBegin; 943 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 944 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 945 nz = A->nz + B->nz; 946 if (!rank) { 947 header[0] = MAT_FILE_COOKIE; 948 header[1] = mat->rmap->N; 949 header[2] = mat->cmap->N; 950 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 951 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 952 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 953 /* get largest number of rows any processor has */ 954 rlen = mat->rmap->n; 955 range = mat->rmap->range; 956 for (i=1; i<size; i++) { 957 rlen = PetscMax(rlen,range[i+1] - range[i]); 958 } 959 } else { 960 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 961 rlen = mat->rmap->n; 962 } 963 964 /* load up the local row counts */ 965 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 966 for (i=0; i<mat->rmap->n; i++) { 967 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 968 } 969 970 /* store the row lengths to the file */ 971 if (!rank) { 972 MPI_Status status; 973 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 974 for (i=1; i<size; i++) { 975 rlen = range[i+1] - range[i]; 976 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 977 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 978 } 979 } else { 980 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 981 } 982 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 983 984 /* load up the local column indices */ 985 nzmax = nz; /* )th processor needs space a largest processor needs */ 986 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 987 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 988 cnt = 0; 989 for (i=0; i<mat->rmap->n; i++) { 990 for (j=B->i[i]; j<B->i[i+1]; j++) { 991 if ( (col = garray[B->j[j]]) > cstart) break; 992 column_indices[cnt++] = col; 993 } 994 for (k=A->i[i]; k<A->i[i+1]; k++) { 995 column_indices[cnt++] = A->j[k] + cstart; 996 } 997 for (; j<B->i[i+1]; j++) { 998 column_indices[cnt++] = garray[B->j[j]]; 999 } 1000 } 1001 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1002 1003 /* store the column indices to the file */ 1004 if (!rank) { 1005 MPI_Status status; 1006 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1007 for (i=1; i<size; i++) { 1008 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1009 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1010 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1011 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1012 } 1013 } else { 1014 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1015 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1016 } 1017 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1018 1019 /* load up the local column values */ 1020 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1021 cnt = 0; 1022 for (i=0; i<mat->rmap->n; i++) { 1023 for (j=B->i[i]; j<B->i[i+1]; j++) { 1024 if ( garray[B->j[j]] > cstart) break; 1025 column_values[cnt++] = B->a[j]; 1026 } 1027 for (k=A->i[i]; k<A->i[i+1]; k++) { 1028 column_values[cnt++] = A->a[k]; 1029 } 1030 for (; j<B->i[i+1]; j++) { 1031 column_values[cnt++] = B->a[j]; 1032 } 1033 } 1034 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1035 1036 /* store the column values to the file */ 1037 if (!rank) { 1038 MPI_Status status; 1039 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1040 for (i=1; i<size; i++) { 1041 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1042 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1043 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1044 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1045 } 1046 } else { 1047 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1048 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1049 } 1050 ierr = PetscFree(column_values);CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1056 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1057 { 1058 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1059 PetscErrorCode ierr; 1060 PetscMPIInt rank = aij->rank,size = aij->size; 1061 PetscTruth isdraw,iascii,isbinary; 1062 PetscViewer sviewer; 1063 PetscViewerFormat format; 1064 1065 PetscFunctionBegin; 1066 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1067 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1068 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1069 if (iascii) { 1070 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1071 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1072 MatInfo info; 1073 PetscTruth inodes; 1074 1075 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1076 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1077 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1078 if (!inodes) { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1083 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1084 } 1085 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1086 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1087 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1089 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1090 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1091 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1094 PetscInt inodecount,inodelimit,*inodes; 1095 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1096 if (inodes) { 1097 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1098 } else { 1099 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1100 } 1101 PetscFunctionReturn(0); 1102 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1103 PetscFunctionReturn(0); 1104 } 1105 } else if (isbinary) { 1106 if (size == 1) { 1107 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1108 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1109 } else { 1110 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1111 } 1112 PetscFunctionReturn(0); 1113 } else if (isdraw) { 1114 PetscDraw draw; 1115 PetscTruth isnull; 1116 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1117 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1118 } 1119 1120 if (size == 1) { 1121 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1122 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1123 } else { 1124 /* assemble the entire matrix onto first processor. */ 1125 Mat A; 1126 Mat_SeqAIJ *Aloc; 1127 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1128 MatScalar *a; 1129 1130 if (mat->rmap->N > 1024) { 1131 PetscTruth flg = PETSC_FALSE; 1132 1133 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1134 if (!flg) { 1135 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1136 } 1137 } 1138 1139 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1140 if (!rank) { 1141 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1142 } else { 1143 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1144 } 1145 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1146 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1147 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1148 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1149 1150 /* copy over the A part */ 1151 Aloc = (Mat_SeqAIJ*)aij->A->data; 1152 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1153 row = mat->rmap->rstart; 1154 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1155 for (i=0; i<m; i++) { 1156 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1157 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1158 } 1159 aj = Aloc->j; 1160 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1161 1162 /* copy over the B part */ 1163 Aloc = (Mat_SeqAIJ*)aij->B->data; 1164 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1165 row = mat->rmap->rstart; 1166 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1167 ct = cols; 1168 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1169 for (i=0; i<m; i++) { 1170 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1171 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1172 } 1173 ierr = PetscFree(ct);CHKERRQ(ierr); 1174 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1175 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1176 /* 1177 Everyone has to call to draw the matrix since the graphics waits are 1178 synchronized across all processors that share the PetscDraw object 1179 */ 1180 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1181 if (!rank) { 1182 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1183 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1184 } 1185 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1186 ierr = MatDestroy(A);CHKERRQ(ierr); 1187 } 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatView_MPIAIJ" 1193 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1194 { 1195 PetscErrorCode ierr; 1196 PetscTruth iascii,isdraw,issocket,isbinary; 1197 1198 PetscFunctionBegin; 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1202 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1203 if (iascii || isdraw || isbinary || issocket) { 1204 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1205 } else { 1206 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1207 } 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatRelax_MPIAIJ" 1213 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1214 { 1215 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1216 PetscErrorCode ierr; 1217 Vec bb1 = 0; 1218 PetscTruth hasop; 1219 1220 PetscFunctionBegin; 1221 if (its > 1) {ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);} 1222 1223 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1224 if (flag & SOR_ZERO_INITIAL_GUESS) { 1225 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1226 its--; 1227 } 1228 1229 while (its--) { 1230 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 1233 /* update rhs: bb1 = bb - B*x */ 1234 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1235 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1236 1237 /* local sweep */ 1238 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1239 } 1240 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1241 if (flag & SOR_ZERO_INITIAL_GUESS) { 1242 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1243 its--; 1244 } 1245 while (its--) { 1246 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1247 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 1249 /* update rhs: bb1 = bb - B*x */ 1250 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1251 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1252 1253 /* local sweep */ 1254 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1255 } 1256 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1257 if (flag & SOR_ZERO_INITIAL_GUESS) { 1258 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1259 its--; 1260 } 1261 while (its--) { 1262 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 1265 /* update rhs: bb1 = bb - B*x */ 1266 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1267 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1268 1269 /* local sweep */ 1270 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1271 } 1272 } else if (flag & SOR_EISENSTAT) { 1273 Vec xx1; 1274 1275 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1276 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1277 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1278 1279 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1280 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1281 if (!mat->diag) { 1282 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1283 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1284 } 1285 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1286 if (hasop) { 1287 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1288 } else { 1289 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1290 } 1291 ierr = VecWAXPY(bb1,-1.0,bb1,bb);CHKERRQ(ierr); 1292 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1293 1294 /* local sweep */ 1295 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1296 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1297 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1298 } else { 1299 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1300 } 1301 1302 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatPermute_MPIAIJ" 1308 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1309 { 1310 MPI_Comm comm,pcomm; 1311 PetscInt first,local_size,nrows; 1312 const PetscInt *rows; 1313 PetscMPIInt size; 1314 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1319 /* make a collective version of 'rowp' */ 1320 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1321 if (pcomm==comm) { 1322 crowp = rowp; 1323 } else { 1324 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1325 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1326 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1327 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1328 } 1329 /* collect the global row permutation and invert it */ 1330 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1331 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1332 if (pcomm!=comm) { 1333 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1334 } 1335 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1336 /* get the local target indices */ 1337 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1338 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1339 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1340 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1341 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1342 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1343 /* the column permutation is so much easier; 1344 make a local version of 'colp' and invert it */ 1345 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1346 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1347 if (size==1) { 1348 lcolp = colp; 1349 } else { 1350 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1351 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1352 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1353 } 1354 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1355 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1356 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1357 if (size>1) { 1358 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1359 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1360 } 1361 /* now we just get the submatrix */ 1362 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1363 /* clean up */ 1364 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1365 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1371 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1372 { 1373 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1374 Mat A = mat->A,B = mat->B; 1375 PetscErrorCode ierr; 1376 PetscReal isend[5],irecv[5]; 1377 1378 PetscFunctionBegin; 1379 info->block_size = 1.0; 1380 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1382 isend[3] = info->memory; isend[4] = info->mallocs; 1383 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1384 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1385 isend[3] += info->memory; isend[4] += info->mallocs; 1386 if (flag == MAT_LOCAL) { 1387 info->nz_used = isend[0]; 1388 info->nz_allocated = isend[1]; 1389 info->nz_unneeded = isend[2]; 1390 info->memory = isend[3]; 1391 info->mallocs = isend[4]; 1392 } else if (flag == MAT_GLOBAL_MAX) { 1393 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1394 info->nz_used = irecv[0]; 1395 info->nz_allocated = irecv[1]; 1396 info->nz_unneeded = irecv[2]; 1397 info->memory = irecv[3]; 1398 info->mallocs = irecv[4]; 1399 } else if (flag == MAT_GLOBAL_SUM) { 1400 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1401 info->nz_used = irecv[0]; 1402 info->nz_allocated = irecv[1]; 1403 info->nz_unneeded = irecv[2]; 1404 info->memory = irecv[3]; 1405 info->mallocs = irecv[4]; 1406 } 1407 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1408 info->fill_ratio_needed = 0; 1409 info->factor_mallocs = 0; 1410 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatSetOption_MPIAIJ" 1416 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1417 { 1418 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1419 PetscErrorCode ierr; 1420 1421 PetscFunctionBegin; 1422 switch (op) { 1423 case MAT_NEW_NONZERO_LOCATIONS: 1424 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1425 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1426 case MAT_KEEP_NONZERO_PATTERN: 1427 case MAT_NEW_NONZERO_LOCATION_ERR: 1428 case MAT_USE_INODES: 1429 case MAT_IGNORE_ZERO_ENTRIES: 1430 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1431 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1432 break; 1433 case MAT_ROW_ORIENTED: 1434 a->roworiented = flg; 1435 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1436 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1437 break; 1438 case MAT_NEW_DIAGONALS: 1439 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1440 break; 1441 case MAT_IGNORE_OFF_PROC_ENTRIES: 1442 a->donotstash = PETSC_TRUE; 1443 break; 1444 case MAT_SYMMETRIC: 1445 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1446 break; 1447 case MAT_STRUCTURALLY_SYMMETRIC: 1448 case MAT_HERMITIAN: 1449 case MAT_SYMMETRY_ETERNAL: 1450 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1451 break; 1452 default: 1453 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatGetRow_MPIAIJ" 1460 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1461 { 1462 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1463 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1464 PetscErrorCode ierr; 1465 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1466 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1467 PetscInt *cmap,*idx_p; 1468 1469 PetscFunctionBegin; 1470 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1471 mat->getrowactive = PETSC_TRUE; 1472 1473 if (!mat->rowvalues && (idx || v)) { 1474 /* 1475 allocate enough space to hold information from the longest row. 1476 */ 1477 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1478 PetscInt max = 1,tmp; 1479 for (i=0; i<matin->rmap->n; i++) { 1480 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1481 if (max < tmp) { max = tmp; } 1482 } 1483 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1484 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1485 } 1486 1487 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1488 lrow = row - rstart; 1489 1490 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1491 if (!v) {pvA = 0; pvB = 0;} 1492 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1493 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1494 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1495 nztot = nzA + nzB; 1496 1497 cmap = mat->garray; 1498 if (v || idx) { 1499 if (nztot) { 1500 /* Sort by increasing column numbers, assuming A and B already sorted */ 1501 PetscInt imark = -1; 1502 if (v) { 1503 *v = v_p = mat->rowvalues; 1504 for (i=0; i<nzB; i++) { 1505 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1506 else break; 1507 } 1508 imark = i; 1509 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1510 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1511 } 1512 if (idx) { 1513 *idx = idx_p = mat->rowindices; 1514 if (imark > -1) { 1515 for (i=0; i<imark; i++) { 1516 idx_p[i] = cmap[cworkB[i]]; 1517 } 1518 } else { 1519 for (i=0; i<nzB; i++) { 1520 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1521 else break; 1522 } 1523 imark = i; 1524 } 1525 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1526 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1527 } 1528 } else { 1529 if (idx) *idx = 0; 1530 if (v) *v = 0; 1531 } 1532 } 1533 *nz = nztot; 1534 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1535 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1541 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1542 { 1543 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1544 1545 PetscFunctionBegin; 1546 if (!aij->getrowactive) { 1547 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1548 } 1549 aij->getrowactive = PETSC_FALSE; 1550 PetscFunctionReturn(0); 1551 } 1552 1553 #undef __FUNCT__ 1554 #define __FUNCT__ "MatNorm_MPIAIJ" 1555 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1556 { 1557 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1558 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1559 PetscErrorCode ierr; 1560 PetscInt i,j,cstart = mat->cmap->rstart; 1561 PetscReal sum = 0.0; 1562 MatScalar *v; 1563 1564 PetscFunctionBegin; 1565 if (aij->size == 1) { 1566 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1567 } else { 1568 if (type == NORM_FROBENIUS) { 1569 v = amat->a; 1570 for (i=0; i<amat->nz; i++) { 1571 #if defined(PETSC_USE_COMPLEX) 1572 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1573 #else 1574 sum += (*v)*(*v); v++; 1575 #endif 1576 } 1577 v = bmat->a; 1578 for (i=0; i<bmat->nz; i++) { 1579 #if defined(PETSC_USE_COMPLEX) 1580 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1581 #else 1582 sum += (*v)*(*v); v++; 1583 #endif 1584 } 1585 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1586 *norm = sqrt(*norm); 1587 } else if (type == NORM_1) { /* max column norm */ 1588 PetscReal *tmp,*tmp2; 1589 PetscInt *jj,*garray = aij->garray; 1590 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1591 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1592 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1593 *norm = 0.0; 1594 v = amat->a; jj = amat->j; 1595 for (j=0; j<amat->nz; j++) { 1596 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1597 } 1598 v = bmat->a; jj = bmat->j; 1599 for (j=0; j<bmat->nz; j++) { 1600 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1601 } 1602 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1603 for (j=0; j<mat->cmap->N; j++) { 1604 if (tmp2[j] > *norm) *norm = tmp2[j]; 1605 } 1606 ierr = PetscFree(tmp);CHKERRQ(ierr); 1607 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1608 } else if (type == NORM_INFINITY) { /* max row norm */ 1609 PetscReal ntemp = 0.0; 1610 for (j=0; j<aij->A->rmap->n; j++) { 1611 v = amat->a + amat->i[j]; 1612 sum = 0.0; 1613 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1614 sum += PetscAbsScalar(*v); v++; 1615 } 1616 v = bmat->a + bmat->i[j]; 1617 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1618 sum += PetscAbsScalar(*v); v++; 1619 } 1620 if (sum > ntemp) ntemp = sum; 1621 } 1622 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1623 } else { 1624 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1625 } 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "MatTranspose_MPIAIJ" 1632 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1633 { 1634 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1635 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1636 PetscErrorCode ierr; 1637 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1638 PetscInt cstart=A->cmap->rstart,ncol; 1639 Mat B; 1640 MatScalar *array; 1641 1642 PetscFunctionBegin; 1643 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1644 1645 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1646 ai = Aloc->i; aj = Aloc->j; 1647 bi = Bloc->i; bj = Bloc->j; 1648 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1649 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1650 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1651 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1652 for (i=0; i<ai[ma]; i++){ 1653 d_nnz[aj[i]] ++; 1654 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1655 } 1656 1657 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1658 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1659 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1660 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1661 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1662 } else { 1663 B = *matout; 1664 } 1665 1666 /* copy over the A part */ 1667 array = Aloc->a; 1668 row = A->rmap->rstart; 1669 for (i=0; i<ma; i++) { 1670 ncol = ai[i+1]-ai[i]; 1671 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1672 row++; array += ncol; aj += ncol; 1673 } 1674 aj = Aloc->j; 1675 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1676 1677 /* copy over the B part */ 1678 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1679 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1680 array = Bloc->a; 1681 row = A->rmap->rstart; 1682 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1683 cols_tmp = cols; 1684 for (i=0; i<mb; i++) { 1685 ncol = bi[i+1]-bi[i]; 1686 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1687 row++; array += ncol; cols_tmp += ncol; 1688 } 1689 ierr = PetscFree(cols);CHKERRQ(ierr); 1690 1691 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1692 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1693 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1694 *matout = B; 1695 } else { 1696 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1697 } 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1703 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1704 { 1705 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1706 Mat a = aij->A,b = aij->B; 1707 PetscErrorCode ierr; 1708 PetscInt s1,s2,s3; 1709 1710 PetscFunctionBegin; 1711 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1712 if (rr) { 1713 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1714 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1715 /* Overlap communication with computation. */ 1716 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1717 } 1718 if (ll) { 1719 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1720 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1721 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1722 } 1723 /* scale the diagonal block */ 1724 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1725 1726 if (rr) { 1727 /* Do a scatter end and then right scale the off-diagonal block */ 1728 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1729 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1730 } 1731 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1737 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1738 { 1739 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1740 PetscErrorCode ierr; 1741 1742 PetscFunctionBegin; 1743 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1744 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1745 PetscFunctionReturn(0); 1746 } 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1749 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1750 { 1751 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1752 PetscErrorCode ierr; 1753 1754 PetscFunctionBegin; 1755 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatEqual_MPIAIJ" 1761 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1762 { 1763 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1764 Mat a,b,c,d; 1765 PetscTruth flg; 1766 PetscErrorCode ierr; 1767 1768 PetscFunctionBegin; 1769 a = matA->A; b = matA->B; 1770 c = matB->A; d = matB->B; 1771 1772 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1773 if (flg) { 1774 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1775 } 1776 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatCopy_MPIAIJ" 1782 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1783 { 1784 PetscErrorCode ierr; 1785 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1786 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1787 1788 PetscFunctionBegin; 1789 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1790 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1791 /* because of the column compression in the off-processor part of the matrix a->B, 1792 the number of columns in a->B and b->B may be different, hence we cannot call 1793 the MatCopy() directly on the two parts. If need be, we can provide a more 1794 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1795 then copying the submatrices */ 1796 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1797 } else { 1798 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1799 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1806 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1807 { 1808 PetscErrorCode ierr; 1809 1810 PetscFunctionBegin; 1811 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #include "petscblaslapack.h" 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatAXPY_MPIAIJ" 1818 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1819 { 1820 PetscErrorCode ierr; 1821 PetscInt i; 1822 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1823 PetscBLASInt bnz,one=1; 1824 Mat_SeqAIJ *x,*y; 1825 1826 PetscFunctionBegin; 1827 if (str == SAME_NONZERO_PATTERN) { 1828 PetscScalar alpha = a; 1829 x = (Mat_SeqAIJ *)xx->A->data; 1830 y = (Mat_SeqAIJ *)yy->A->data; 1831 bnz = PetscBLASIntCast(x->nz); 1832 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1833 x = (Mat_SeqAIJ *)xx->B->data; 1834 y = (Mat_SeqAIJ *)yy->B->data; 1835 bnz = PetscBLASIntCast(x->nz); 1836 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1837 } else if (str == SUBSET_NONZERO_PATTERN) { 1838 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1839 1840 x = (Mat_SeqAIJ *)xx->B->data; 1841 y = (Mat_SeqAIJ *)yy->B->data; 1842 if (y->xtoy && y->XtoY != xx->B) { 1843 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1844 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1845 } 1846 if (!y->xtoy) { /* get xtoy */ 1847 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1848 y->XtoY = xx->B; 1849 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1850 } 1851 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1852 } else { 1853 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1854 } 1855 PetscFunctionReturn(0); 1856 } 1857 1858 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1859 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatConjugate_MPIAIJ" 1862 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1863 { 1864 #if defined(PETSC_USE_COMPLEX) 1865 PetscErrorCode ierr; 1866 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1867 1868 PetscFunctionBegin; 1869 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1870 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1871 #else 1872 PetscFunctionBegin; 1873 #endif 1874 PetscFunctionReturn(0); 1875 } 1876 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatRealPart_MPIAIJ" 1879 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1880 { 1881 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1882 PetscErrorCode ierr; 1883 1884 PetscFunctionBegin; 1885 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1886 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1887 PetscFunctionReturn(0); 1888 } 1889 1890 #undef __FUNCT__ 1891 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1892 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1893 { 1894 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1895 PetscErrorCode ierr; 1896 1897 PetscFunctionBegin; 1898 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1899 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1900 PetscFunctionReturn(0); 1901 } 1902 1903 #ifdef PETSC_HAVE_PBGL 1904 1905 #include <boost/parallel/mpi/bsp_process_group.hpp> 1906 #include <boost/graph/distributed/ilu_default_graph.hpp> 1907 #include <boost/graph/distributed/ilu_0_block.hpp> 1908 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1909 #include <boost/graph/distributed/petsc/interface.hpp> 1910 #include <boost/multi_array.hpp> 1911 #include <boost/parallel/distributed_property_map->hpp> 1912 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1915 /* 1916 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1917 */ 1918 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1919 { 1920 namespace petsc = boost::distributed::petsc; 1921 1922 namespace graph_dist = boost::graph::distributed; 1923 using boost::graph::distributed::ilu_default::process_group_type; 1924 using boost::graph::ilu_permuted; 1925 1926 PetscTruth row_identity, col_identity; 1927 PetscContainer c; 1928 PetscInt m, n, M, N; 1929 PetscErrorCode ierr; 1930 1931 PetscFunctionBegin; 1932 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1933 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1934 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1935 if (!row_identity || !col_identity) { 1936 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1937 } 1938 1939 process_group_type pg; 1940 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1941 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1942 lgraph_type& level_graph = *lgraph_p; 1943 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1944 1945 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1946 ilu_permuted(level_graph); 1947 1948 /* put together the new matrix */ 1949 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1950 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1951 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1952 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1953 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1954 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1956 1957 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1958 ierr = PetscContainerSetPointer(c, lgraph_p); 1959 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1965 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1966 { 1967 PetscFunctionBegin; 1968 PetscFunctionReturn(0); 1969 } 1970 1971 #undef __FUNCT__ 1972 #define __FUNCT__ "MatSolve_MPIAIJ" 1973 /* 1974 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1975 */ 1976 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1977 { 1978 namespace graph_dist = boost::graph::distributed; 1979 1980 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1981 lgraph_type* lgraph_p; 1982 PetscContainer c; 1983 PetscErrorCode ierr; 1984 1985 PetscFunctionBegin; 1986 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1987 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1988 ierr = VecCopy(b, x);CHKERRQ(ierr); 1989 1990 PetscScalar* array_x; 1991 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1992 PetscInt sx; 1993 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1994 1995 PetscScalar* array_b; 1996 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1997 PetscInt sb; 1998 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1999 2000 lgraph_type& level_graph = *lgraph_p; 2001 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2002 2003 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2004 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2005 ref_x(array_x, boost::extents[num_vertices(graph)]); 2006 2007 typedef boost::iterator_property_map<array_ref_type::iterator, 2008 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2009 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2010 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2011 2012 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2013 2014 PetscFunctionReturn(0); 2015 } 2016 #endif 2017 2018 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2019 PetscInt nzlocal,nsends,nrecvs; 2020 PetscMPIInt *send_rank; 2021 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 2022 PetscScalar *sbuf_a,**rbuf_a; 2023 PetscErrorCode (*MatDestroy)(Mat); 2024 } Mat_Redundant; 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2028 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2029 { 2030 PetscErrorCode ierr; 2031 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2032 PetscInt i; 2033 2034 PetscFunctionBegin; 2035 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 2036 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2037 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2038 for (i=0; i<redund->nrecvs; i++){ 2039 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2040 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2041 } 2042 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2043 ierr = PetscFree(redund);CHKERRQ(ierr); 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatDestroy_MatRedundant" 2049 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2050 { 2051 PetscErrorCode ierr; 2052 PetscContainer container; 2053 Mat_Redundant *redund=PETSC_NULL; 2054 2055 PetscFunctionBegin; 2056 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2057 if (container) { 2058 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2059 } else { 2060 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2061 } 2062 A->ops->destroy = redund->MatDestroy; 2063 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2064 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2065 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2066 PetscFunctionReturn(0); 2067 } 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2071 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2072 { 2073 PetscMPIInt rank,size; 2074 MPI_Comm comm=((PetscObject)mat)->comm; 2075 PetscErrorCode ierr; 2076 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2077 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2078 PetscInt *rowrange=mat->rmap->range; 2079 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2080 Mat A=aij->A,B=aij->B,C=*matredundant; 2081 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2082 PetscScalar *sbuf_a; 2083 PetscInt nzlocal=a->nz+b->nz; 2084 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2085 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2086 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2087 MatScalar *aworkA,*aworkB; 2088 PetscScalar *vals; 2089 PetscMPIInt tag1,tag2,tag3,imdex; 2090 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2091 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2092 MPI_Status recv_status,*send_status; 2093 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2094 PetscInt **rbuf_j=PETSC_NULL; 2095 PetscScalar **rbuf_a=PETSC_NULL; 2096 Mat_Redundant *redund=PETSC_NULL; 2097 PetscContainer container; 2098 2099 PetscFunctionBegin; 2100 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2101 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2102 2103 if (reuse == MAT_REUSE_MATRIX) { 2104 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2105 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2106 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2107 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2108 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2109 if (container) { 2110 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2111 } else { 2112 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2113 } 2114 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2115 2116 nsends = redund->nsends; 2117 nrecvs = redund->nrecvs; 2118 send_rank = redund->send_rank; recv_rank = send_rank + size; 2119 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2120 sbuf_j = redund->sbuf_j; 2121 sbuf_a = redund->sbuf_a; 2122 rbuf_j = redund->rbuf_j; 2123 rbuf_a = redund->rbuf_a; 2124 } 2125 2126 if (reuse == MAT_INITIAL_MATRIX){ 2127 PetscMPIInt subrank,subsize; 2128 PetscInt nleftover,np_subcomm; 2129 /* get the destination processors' id send_rank, nsends and nrecvs */ 2130 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2131 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2132 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2133 recv_rank = send_rank + size; 2134 np_subcomm = size/nsubcomm; 2135 nleftover = size - nsubcomm*np_subcomm; 2136 nsends = 0; nrecvs = 0; 2137 for (i=0; i<size; i++){ /* i=rank*/ 2138 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2139 send_rank[nsends] = i; nsends++; 2140 recv_rank[nrecvs++] = i; 2141 } 2142 } 2143 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2144 i = size-nleftover-1; 2145 j = 0; 2146 while (j < nsubcomm - nleftover){ 2147 send_rank[nsends++] = i; 2148 i--; j++; 2149 } 2150 } 2151 2152 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2153 for (i=0; i<nleftover; i++){ 2154 recv_rank[nrecvs++] = size-nleftover+i; 2155 } 2156 } 2157 2158 /* allocate sbuf_j, sbuf_a */ 2159 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2160 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2161 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2162 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2163 2164 /* copy mat's local entries into the buffers */ 2165 if (reuse == MAT_INITIAL_MATRIX){ 2166 rownz_max = 0; 2167 rptr = sbuf_j; 2168 cols = sbuf_j + rend-rstart + 1; 2169 vals = sbuf_a; 2170 rptr[0] = 0; 2171 for (i=0; i<rend-rstart; i++){ 2172 row = i + rstart; 2173 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2174 ncols = nzA + nzB; 2175 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2176 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2177 /* load the column indices for this row into cols */ 2178 lwrite = 0; 2179 for (l=0; l<nzB; l++) { 2180 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2181 vals[lwrite] = aworkB[l]; 2182 cols[lwrite++] = ctmp; 2183 } 2184 } 2185 for (l=0; l<nzA; l++){ 2186 vals[lwrite] = aworkA[l]; 2187 cols[lwrite++] = cstart + cworkA[l]; 2188 } 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 vals += ncols; 2196 cols += ncols; 2197 rptr[i+1] = rptr[i] + ncols; 2198 if (rownz_max < ncols) rownz_max = ncols; 2199 } 2200 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2201 } else { /* only copy matrix values into sbuf_a */ 2202 rptr = sbuf_j; 2203 vals = sbuf_a; 2204 rptr[0] = 0; 2205 for (i=0; i<rend-rstart; i++){ 2206 row = i + rstart; 2207 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2208 ncols = nzA + nzB; 2209 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2210 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2211 lwrite = 0; 2212 for (l=0; l<nzB; l++) { 2213 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2214 } 2215 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2216 for (l=0; l<nzB; l++) { 2217 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2218 } 2219 vals += ncols; 2220 rptr[i+1] = rptr[i] + ncols; 2221 } 2222 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2223 2224 /* send nzlocal to others, and recv other's nzlocal */ 2225 /*--------------------------------------------------*/ 2226 if (reuse == MAT_INITIAL_MATRIX){ 2227 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2228 s_waits2 = s_waits3 + nsends; 2229 s_waits1 = s_waits2 + nsends; 2230 r_waits1 = s_waits1 + nsends; 2231 r_waits2 = r_waits1 + nrecvs; 2232 r_waits3 = r_waits2 + nrecvs; 2233 } else { 2234 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2235 r_waits3 = s_waits3 + nsends; 2236 } 2237 2238 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2239 if (reuse == MAT_INITIAL_MATRIX){ 2240 /* get new tags to keep the communication clean */ 2241 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2242 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2243 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2244 rbuf_nz = sbuf_nz + nsends; 2245 2246 /* post receives of other's nzlocal */ 2247 for (i=0; i<nrecvs; i++){ 2248 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2249 } 2250 /* send nzlocal to others */ 2251 for (i=0; i<nsends; i++){ 2252 sbuf_nz[i] = nzlocal; 2253 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2254 } 2255 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2256 count = nrecvs; 2257 while (count) { 2258 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2259 recv_rank[imdex] = recv_status.MPI_SOURCE; 2260 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2261 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2262 2263 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2264 rbuf_nz[imdex] += i + 2; 2265 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2266 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2267 count--; 2268 } 2269 /* wait on sends of nzlocal */ 2270 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2271 /* send mat->i,j to others, and recv from other's */ 2272 /*------------------------------------------------*/ 2273 for (i=0; i<nsends; i++){ 2274 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2275 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2276 } 2277 /* wait on receives of mat->i,j */ 2278 /*------------------------------*/ 2279 count = nrecvs; 2280 while (count) { 2281 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2282 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2283 count--; 2284 } 2285 /* wait on sends of mat->i,j */ 2286 /*---------------------------*/ 2287 if (nsends) { 2288 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2289 } 2290 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2291 2292 /* post receives, send and receive mat->a */ 2293 /*----------------------------------------*/ 2294 for (imdex=0; imdex<nrecvs; imdex++) { 2295 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2296 } 2297 for (i=0; i<nsends; i++){ 2298 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2299 } 2300 count = nrecvs; 2301 while (count) { 2302 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2303 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2304 count--; 2305 } 2306 if (nsends) { 2307 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2308 } 2309 2310 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2311 2312 /* create redundant matrix */ 2313 /*-------------------------*/ 2314 if (reuse == MAT_INITIAL_MATRIX){ 2315 /* compute rownz_max for preallocation */ 2316 for (imdex=0; imdex<nrecvs; imdex++){ 2317 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2318 rptr = rbuf_j[imdex]; 2319 for (i=0; i<j; i++){ 2320 ncols = rptr[i+1] - rptr[i]; 2321 if (rownz_max < ncols) rownz_max = ncols; 2322 } 2323 } 2324 2325 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2326 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2327 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2328 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2329 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2330 } else { 2331 C = *matredundant; 2332 } 2333 2334 /* insert local matrix entries */ 2335 rptr = sbuf_j; 2336 cols = sbuf_j + rend-rstart + 1; 2337 vals = sbuf_a; 2338 for (i=0; i<rend-rstart; i++){ 2339 row = i + rstart; 2340 ncols = rptr[i+1] - rptr[i]; 2341 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2342 vals += ncols; 2343 cols += ncols; 2344 } 2345 /* insert received matrix entries */ 2346 for (imdex=0; imdex<nrecvs; imdex++){ 2347 rstart = rowrange[recv_rank[imdex]]; 2348 rend = rowrange[recv_rank[imdex]+1]; 2349 rptr = rbuf_j[imdex]; 2350 cols = rbuf_j[imdex] + rend-rstart + 1; 2351 vals = rbuf_a[imdex]; 2352 for (i=0; i<rend-rstart; i++){ 2353 row = i + rstart; 2354 ncols = rptr[i+1] - rptr[i]; 2355 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2356 vals += ncols; 2357 cols += ncols; 2358 } 2359 } 2360 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2362 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2363 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2364 if (reuse == MAT_INITIAL_MATRIX){ 2365 PetscContainer container; 2366 *matredundant = C; 2367 /* create a supporting struct and attach it to C for reuse */ 2368 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2369 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2370 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2371 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2372 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2373 2374 redund->nzlocal = nzlocal; 2375 redund->nsends = nsends; 2376 redund->nrecvs = nrecvs; 2377 redund->send_rank = send_rank; 2378 redund->sbuf_nz = sbuf_nz; 2379 redund->sbuf_j = sbuf_j; 2380 redund->sbuf_a = sbuf_a; 2381 redund->rbuf_j = rbuf_j; 2382 redund->rbuf_a = rbuf_a; 2383 2384 redund->MatDestroy = C->ops->destroy; 2385 C->ops->destroy = MatDestroy_MatRedundant; 2386 } 2387 PetscFunctionReturn(0); 2388 } 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2392 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2393 { 2394 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2395 PetscErrorCode ierr; 2396 PetscInt i,*idxb = 0; 2397 PetscScalar *va,*vb; 2398 Vec vtmp; 2399 2400 PetscFunctionBegin; 2401 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2402 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2403 if (idx) { 2404 for (i=0; i<A->rmap->n; i++) { 2405 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2406 } 2407 } 2408 2409 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2410 if (idx) { 2411 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2412 } 2413 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2414 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2415 2416 for (i=0; i<A->rmap->n; i++){ 2417 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2418 va[i] = vb[i]; 2419 if (idx) idx[i] = a->garray[idxb[i]]; 2420 } 2421 } 2422 2423 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2424 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2425 if (idxb) { 2426 ierr = PetscFree(idxb);CHKERRQ(ierr); 2427 } 2428 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2429 PetscFunctionReturn(0); 2430 } 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2434 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2435 { 2436 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2437 PetscErrorCode ierr; 2438 PetscInt i,*idxb = 0; 2439 PetscScalar *va,*vb; 2440 Vec vtmp; 2441 2442 PetscFunctionBegin; 2443 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2444 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2445 if (idx) { 2446 for (i=0; i<A->cmap->n; i++) { 2447 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2448 } 2449 } 2450 2451 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2452 if (idx) { 2453 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2454 } 2455 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2456 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2457 2458 for (i=0; i<A->rmap->n; i++){ 2459 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2460 va[i] = vb[i]; 2461 if (idx) idx[i] = a->garray[idxb[i]]; 2462 } 2463 } 2464 2465 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2466 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2467 if (idxb) { 2468 ierr = PetscFree(idxb);CHKERRQ(ierr); 2469 } 2470 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2471 PetscFunctionReturn(0); 2472 } 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2476 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2477 { 2478 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2479 PetscInt n = A->rmap->n; 2480 PetscInt cstart = A->cmap->rstart; 2481 PetscInt *cmap = mat->garray; 2482 PetscInt *diagIdx, *offdiagIdx; 2483 Vec diagV, offdiagV; 2484 PetscScalar *a, *diagA, *offdiagA; 2485 PetscInt r; 2486 PetscErrorCode ierr; 2487 2488 PetscFunctionBegin; 2489 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2490 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2491 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2492 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2493 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2494 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2495 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2496 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2497 for(r = 0; r < n; ++r) { 2498 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2499 a[r] = diagA[r]; 2500 idx[r] = cstart + diagIdx[r]; 2501 } else { 2502 a[r] = offdiagA[r]; 2503 idx[r] = cmap[offdiagIdx[r]]; 2504 } 2505 } 2506 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2507 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2508 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2509 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2510 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2511 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2512 PetscFunctionReturn(0); 2513 } 2514 2515 #undef __FUNCT__ 2516 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2517 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2518 { 2519 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2520 PetscInt n = A->rmap->n; 2521 PetscInt cstart = A->cmap->rstart; 2522 PetscInt *cmap = mat->garray; 2523 PetscInt *diagIdx, *offdiagIdx; 2524 Vec diagV, offdiagV; 2525 PetscScalar *a, *diagA, *offdiagA; 2526 PetscInt r; 2527 PetscErrorCode ierr; 2528 2529 PetscFunctionBegin; 2530 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2531 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2532 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2533 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2534 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2535 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2536 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2537 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2538 for(r = 0; r < n; ++r) { 2539 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2540 a[r] = diagA[r]; 2541 idx[r] = cstart + diagIdx[r]; 2542 } else { 2543 a[r] = offdiagA[r]; 2544 idx[r] = cmap[offdiagIdx[r]]; 2545 } 2546 } 2547 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2548 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2549 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2550 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2551 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2552 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2553 PetscFunctionReturn(0); 2554 } 2555 2556 #undef __FUNCT__ 2557 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2558 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2559 { 2560 PetscErrorCode ierr; 2561 2562 PetscFunctionBegin; 2563 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 /* -------------------------------------------------------------------*/ 2568 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2569 MatGetRow_MPIAIJ, 2570 MatRestoreRow_MPIAIJ, 2571 MatMult_MPIAIJ, 2572 /* 4*/ MatMultAdd_MPIAIJ, 2573 MatMultTranspose_MPIAIJ, 2574 MatMultTransposeAdd_MPIAIJ, 2575 #ifdef PETSC_HAVE_PBGL 2576 MatSolve_MPIAIJ, 2577 #else 2578 0, 2579 #endif 2580 0, 2581 0, 2582 /*10*/ 0, 2583 0, 2584 0, 2585 MatRelax_MPIAIJ, 2586 MatTranspose_MPIAIJ, 2587 /*15*/ MatGetInfo_MPIAIJ, 2588 MatEqual_MPIAIJ, 2589 MatGetDiagonal_MPIAIJ, 2590 MatDiagonalScale_MPIAIJ, 2591 MatNorm_MPIAIJ, 2592 /*20*/ MatAssemblyBegin_MPIAIJ, 2593 MatAssemblyEnd_MPIAIJ, 2594 MatSetOption_MPIAIJ, 2595 MatZeroEntries_MPIAIJ, 2596 /*24*/ MatZeroRows_MPIAIJ, 2597 0, 2598 #ifdef PETSC_HAVE_PBGL 2599 0, 2600 #else 2601 0, 2602 #endif 2603 0, 2604 0, 2605 /*29*/ MatSetUpPreallocation_MPIAIJ, 2606 #ifdef PETSC_HAVE_PBGL 2607 0, 2608 #else 2609 0, 2610 #endif 2611 0, 2612 0, 2613 0, 2614 /*34*/ MatDuplicate_MPIAIJ, 2615 0, 2616 0, 2617 0, 2618 0, 2619 /*39*/ MatAXPY_MPIAIJ, 2620 MatGetSubMatrices_MPIAIJ, 2621 MatIncreaseOverlap_MPIAIJ, 2622 MatGetValues_MPIAIJ, 2623 MatCopy_MPIAIJ, 2624 /*44*/ MatGetRowMax_MPIAIJ, 2625 MatScale_MPIAIJ, 2626 0, 2627 0, 2628 0, 2629 /*49*/ MatSetBlockSize_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*54*/ MatFDColoringCreate_MPIAIJ, 2635 0, 2636 MatSetUnfactored_MPIAIJ, 2637 MatPermute_MPIAIJ, 2638 0, 2639 /*59*/ MatGetSubMatrix_MPIAIJ, 2640 MatDestroy_MPIAIJ, 2641 MatView_MPIAIJ, 2642 0, 2643 0, 2644 /*64*/ 0, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2650 MatGetRowMinAbs_MPIAIJ, 2651 0, 2652 MatSetColoring_MPIAIJ, 2653 #if defined(PETSC_HAVE_ADIC) 2654 MatSetValuesAdic_MPIAIJ, 2655 #else 2656 0, 2657 #endif 2658 MatSetValuesAdifor_MPIAIJ, 2659 /*75*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*80*/ 0, 2665 0, 2666 0, 2667 /*83*/ MatLoad_MPIAIJ, 2668 0, 2669 0, 2670 0, 2671 0, 2672 0, 2673 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2674 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2675 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2676 MatPtAP_Basic, 2677 MatPtAPSymbolic_MPIAIJ, 2678 /*94*/ MatPtAPNumeric_MPIAIJ, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /*99*/ 0, 2684 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2685 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2686 MatConjugate_MPIAIJ, 2687 0, 2688 /*104*/MatSetValuesRow_MPIAIJ, 2689 MatRealPart_MPIAIJ, 2690 MatImaginaryPart_MPIAIJ, 2691 0, 2692 0, 2693 /*109*/0, 2694 MatGetRedundantMatrix_MPIAIJ, 2695 MatGetRowMin_MPIAIJ, 2696 0, 2697 0, 2698 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2699 0, 2700 0, 2701 0, 2702 0, 2703 0 2704 }; 2705 2706 /* ----------------------------------------------------------------------------------------*/ 2707 2708 EXTERN_C_BEGIN 2709 #undef __FUNCT__ 2710 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2711 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2712 { 2713 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2714 PetscErrorCode ierr; 2715 2716 PetscFunctionBegin; 2717 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2718 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2719 PetscFunctionReturn(0); 2720 } 2721 EXTERN_C_END 2722 2723 EXTERN_C_BEGIN 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2726 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2727 { 2728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2733 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 EXTERN_C_END 2737 2738 #include "petscpc.h" 2739 EXTERN_C_BEGIN 2740 #undef __FUNCT__ 2741 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2742 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2743 { 2744 Mat_MPIAIJ *b; 2745 PetscErrorCode ierr; 2746 PetscInt i; 2747 2748 PetscFunctionBegin; 2749 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2750 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2751 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2752 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2753 2754 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2755 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2756 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2757 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2758 if (d_nnz) { 2759 for (i=0; i<B->rmap->n; i++) { 2760 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2761 } 2762 } 2763 if (o_nnz) { 2764 for (i=0; i<B->rmap->n; i++) { 2765 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2766 } 2767 } 2768 b = (Mat_MPIAIJ*)B->data; 2769 2770 if (!B->preallocated) { 2771 /* Explicitly create 2 MATSEQAIJ matrices. */ 2772 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2773 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2774 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2775 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2776 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2777 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2778 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2779 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2780 } 2781 2782 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2783 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2784 B->preallocated = PETSC_TRUE; 2785 PetscFunctionReturn(0); 2786 } 2787 EXTERN_C_END 2788 2789 #undef __FUNCT__ 2790 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2791 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2792 { 2793 Mat mat; 2794 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2795 PetscErrorCode ierr; 2796 2797 PetscFunctionBegin; 2798 *newmat = 0; 2799 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2800 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2801 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2802 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2803 a = (Mat_MPIAIJ*)mat->data; 2804 2805 mat->factor = matin->factor; 2806 mat->rmap->bs = matin->rmap->bs; 2807 mat->assembled = PETSC_TRUE; 2808 mat->insertmode = NOT_SET_VALUES; 2809 mat->preallocated = PETSC_TRUE; 2810 2811 a->size = oldmat->size; 2812 a->rank = oldmat->rank; 2813 a->donotstash = oldmat->donotstash; 2814 a->roworiented = oldmat->roworiented; 2815 a->rowindices = 0; 2816 a->rowvalues = 0; 2817 a->getrowactive = PETSC_FALSE; 2818 2819 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2820 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2821 2822 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2823 if (oldmat->colmap) { 2824 #if defined (PETSC_USE_CTABLE) 2825 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2826 #else 2827 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2828 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2829 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2830 #endif 2831 } else a->colmap = 0; 2832 if (oldmat->garray) { 2833 PetscInt len; 2834 len = oldmat->B->cmap->n; 2835 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2836 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2837 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2838 } else a->garray = 0; 2839 2840 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2841 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2842 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2843 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2844 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2845 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2846 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2847 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2848 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2849 *newmat = mat; 2850 PetscFunctionReturn(0); 2851 } 2852 2853 #include "petscsys.h" 2854 2855 #undef __FUNCT__ 2856 #define __FUNCT__ "MatLoad_MPIAIJ" 2857 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2858 { 2859 Mat A; 2860 PetscScalar *vals,*svals; 2861 MPI_Comm comm = ((PetscObject)viewer)->comm; 2862 MPI_Status status; 2863 PetscErrorCode ierr; 2864 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2865 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2866 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2867 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2868 PetscInt cend,cstart,n,*rowners; 2869 int fd; 2870 2871 PetscFunctionBegin; 2872 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2873 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2874 if (!rank) { 2875 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2876 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2877 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2878 } 2879 2880 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2881 M = header[1]; N = header[2]; 2882 /* determine ownership of all rows */ 2883 m = M/size + ((M % size) > rank); 2884 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2885 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2886 2887 /* First process needs enough room for process with most rows */ 2888 if (!rank) { 2889 mmax = rowners[1]; 2890 for (i=2; i<size; i++) { 2891 mmax = PetscMax(mmax,rowners[i]); 2892 } 2893 } else mmax = m; 2894 2895 rowners[0] = 0; 2896 for (i=2; i<=size; i++) { 2897 rowners[i] += rowners[i-1]; 2898 } 2899 rstart = rowners[rank]; 2900 rend = rowners[rank+1]; 2901 2902 /* distribute row lengths to all processors */ 2903 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2904 if (!rank) { 2905 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2906 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2907 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2908 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2909 for (j=0; j<m; j++) { 2910 procsnz[0] += ourlens[j]; 2911 } 2912 for (i=1; i<size; i++) { 2913 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2914 /* calculate the number of nonzeros on each processor */ 2915 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2916 procsnz[i] += rowlengths[j]; 2917 } 2918 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2919 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2920 } 2921 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2922 } else { 2923 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2924 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2925 } 2926 2927 if (!rank) { 2928 /* determine max buffer needed and allocate it */ 2929 maxnz = 0; 2930 for (i=0; i<size; i++) { 2931 maxnz = PetscMax(maxnz,procsnz[i]); 2932 } 2933 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2934 2935 /* read in my part of the matrix column indices */ 2936 nz = procsnz[0]; 2937 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2938 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2939 2940 /* read in every one elses and ship off */ 2941 for (i=1; i<size; i++) { 2942 nz = procsnz[i]; 2943 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2944 mpicnt = PetscMPIIntCast(nz); 2945 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2946 } 2947 ierr = PetscFree(cols);CHKERRQ(ierr); 2948 } else { 2949 /* determine buffer space needed for message */ 2950 nz = 0; 2951 for (i=0; i<m; i++) { 2952 nz += ourlens[i]; 2953 } 2954 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2955 2956 /* receive message of column indices*/ 2957 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2958 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2959 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2960 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2961 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2962 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2963 } 2964 2965 /* determine column ownership if matrix is not square */ 2966 if (N != M) { 2967 n = N/size + ((N % size) > rank); 2968 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2969 cstart = cend - n; 2970 } else { 2971 cstart = rstart; 2972 cend = rend; 2973 n = cend - cstart; 2974 } 2975 2976 /* loop over local rows, determining number of off diagonal entries */ 2977 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2978 jj = 0; 2979 for (i=0; i<m; i++) { 2980 for (j=0; j<ourlens[i]; j++) { 2981 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2982 jj++; 2983 } 2984 } 2985 2986 /* create our matrix */ 2987 for (i=0; i<m; i++) { 2988 ourlens[i] -= offlens[i]; 2989 } 2990 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2991 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2992 ierr = MatSetType(A,type);CHKERRQ(ierr); 2993 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2994 2995 for (i=0; i<m; i++) { 2996 ourlens[i] += offlens[i]; 2997 } 2998 2999 if (!rank) { 3000 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3001 3002 /* read in my part of the matrix numerical values */ 3003 nz = procsnz[0]; 3004 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3005 3006 /* insert into matrix */ 3007 jj = rstart; 3008 smycols = mycols; 3009 svals = vals; 3010 for (i=0; i<m; i++) { 3011 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3012 smycols += ourlens[i]; 3013 svals += ourlens[i]; 3014 jj++; 3015 } 3016 3017 /* read in other processors and ship out */ 3018 for (i=1; i<size; i++) { 3019 nz = procsnz[i]; 3020 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3021 mpicnt = PetscMPIIntCast(nz); 3022 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3023 } 3024 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3025 } else { 3026 /* receive numeric values */ 3027 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3028 3029 /* receive message of values*/ 3030 mpicnt = PetscMPIIntCast(nz); 3031 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3032 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3033 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3034 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3035 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3036 3037 /* insert into matrix */ 3038 jj = rstart; 3039 smycols = mycols; 3040 svals = vals; 3041 for (i=0; i<m; i++) { 3042 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3043 smycols += ourlens[i]; 3044 svals += ourlens[i]; 3045 jj++; 3046 } 3047 } 3048 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3049 ierr = PetscFree(vals);CHKERRQ(ierr); 3050 ierr = PetscFree(mycols);CHKERRQ(ierr); 3051 ierr = PetscFree(rowners);CHKERRQ(ierr); 3052 3053 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3054 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3055 *newmat = A; 3056 PetscFunctionReturn(0); 3057 } 3058 3059 #undef __FUNCT__ 3060 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3061 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3062 { 3063 PetscErrorCode ierr; 3064 IS iscol_local; 3065 PetscInt csize; 3066 3067 PetscFunctionBegin; 3068 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3069 if (call == MAT_REUSE_MATRIX) { 3070 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3071 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3072 } else { 3073 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3074 } 3075 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3076 if (call == MAT_INITIAL_MATRIX) { 3077 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3078 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3079 } 3080 PetscFunctionReturn(0); 3081 } 3082 3083 #undef __FUNCT__ 3084 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3085 /* 3086 Not great since it makes two copies of the submatrix, first an SeqAIJ 3087 in local and then by concatenating the local matrices the end result. 3088 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3089 3090 Note: This requires a sequential iscol with all indices. 3091 */ 3092 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3093 { 3094 PetscErrorCode ierr; 3095 PetscMPIInt rank,size; 3096 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3097 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3098 Mat *local,M,Mreuse; 3099 MatScalar *vwork,*aa; 3100 MPI_Comm comm = ((PetscObject)mat)->comm; 3101 Mat_SeqAIJ *aij; 3102 3103 3104 PetscFunctionBegin; 3105 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3106 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3107 3108 if (call == MAT_REUSE_MATRIX) { 3109 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3110 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3111 local = &Mreuse; 3112 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3113 } else { 3114 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3115 Mreuse = *local; 3116 ierr = PetscFree(local);CHKERRQ(ierr); 3117 } 3118 3119 /* 3120 m - number of local rows 3121 n - number of columns (same on all processors) 3122 rstart - first row in new global matrix generated 3123 */ 3124 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3125 if (call == MAT_INITIAL_MATRIX) { 3126 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3127 ii = aij->i; 3128 jj = aij->j; 3129 3130 /* 3131 Determine the number of non-zeros in the diagonal and off-diagonal 3132 portions of the matrix in order to do correct preallocation 3133 */ 3134 3135 /* first get start and end of "diagonal" columns */ 3136 if (csize == PETSC_DECIDE) { 3137 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3138 if (mglobal == n) { /* square matrix */ 3139 nlocal = m; 3140 } else { 3141 nlocal = n/size + ((n % size) > rank); 3142 } 3143 } else { 3144 nlocal = csize; 3145 } 3146 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3147 rstart = rend - nlocal; 3148 if (rank == size - 1 && rend != n) { 3149 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3150 } 3151 3152 /* next, compute all the lengths */ 3153 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3154 olens = dlens + m; 3155 for (i=0; i<m; i++) { 3156 jend = ii[i+1] - ii[i]; 3157 olen = 0; 3158 dlen = 0; 3159 for (j=0; j<jend; j++) { 3160 if (*jj < rstart || *jj >= rend) olen++; 3161 else dlen++; 3162 jj++; 3163 } 3164 olens[i] = olen; 3165 dlens[i] = dlen; 3166 } 3167 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3168 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3169 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3170 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3171 ierr = PetscFree(dlens);CHKERRQ(ierr); 3172 } else { 3173 PetscInt ml,nl; 3174 3175 M = *newmat; 3176 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3177 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3178 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3179 /* 3180 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3181 rather than the slower MatSetValues(). 3182 */ 3183 M->was_assembled = PETSC_TRUE; 3184 M->assembled = PETSC_FALSE; 3185 } 3186 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3187 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3188 ii = aij->i; 3189 jj = aij->j; 3190 aa = aij->a; 3191 for (i=0; i<m; i++) { 3192 row = rstart + i; 3193 nz = ii[i+1] - ii[i]; 3194 cwork = jj; jj += nz; 3195 vwork = aa; aa += nz; 3196 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3197 } 3198 3199 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3200 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3201 *newmat = M; 3202 3203 /* save submatrix used in processor for next request */ 3204 if (call == MAT_INITIAL_MATRIX) { 3205 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3206 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3207 } 3208 3209 PetscFunctionReturn(0); 3210 } 3211 3212 EXTERN_C_BEGIN 3213 #undef __FUNCT__ 3214 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3215 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3216 { 3217 PetscInt m,cstart, cend,j,nnz,i,d; 3218 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3219 const PetscInt *JJ; 3220 PetscScalar *values; 3221 PetscErrorCode ierr; 3222 3223 PetscFunctionBegin; 3224 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3225 3226 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3227 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3228 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3229 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3230 m = B->rmap->n; 3231 cstart = B->cmap->rstart; 3232 cend = B->cmap->rend; 3233 rstart = B->rmap->rstart; 3234 3235 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3236 o_nnz = d_nnz + m; 3237 3238 #if defined(PETSC_USE_DEBUGGING) 3239 for (i=0; i<m; i++) { 3240 nnz = Ii[i+1]- Ii[i]; 3241 JJ = J + Ii[i]; 3242 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3243 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3244 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3245 for (j=1; j<nnz; j++) { 3246 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3247 } 3248 } 3249 #endif 3250 3251 for (i=0; i<m; i++) { 3252 nnz = Ii[i+1]- Ii[i]; 3253 JJ = J + Ii[i]; 3254 nnz_max = PetscMax(nnz_max,nnz); 3255 for (j=0; j<nnz; j++) { 3256 if (*JJ >= cstart) break; 3257 JJ++; 3258 } 3259 d = 0; 3260 for (; j<nnz; j++) { 3261 if (*JJ++ >= cend) break; 3262 d++; 3263 } 3264 d_nnz[i] = d; 3265 o_nnz[i] = nnz - d; 3266 } 3267 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3268 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3269 3270 if (v) values = (PetscScalar*)v; 3271 else { 3272 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3273 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3274 } 3275 3276 for (i=0; i<m; i++) { 3277 ii = i + rstart; 3278 nnz = Ii[i+1]- Ii[i]; 3279 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3280 } 3281 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3282 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3283 3284 if (!v) { 3285 ierr = PetscFree(values);CHKERRQ(ierr); 3286 } 3287 PetscFunctionReturn(0); 3288 } 3289 EXTERN_C_END 3290 3291 #undef __FUNCT__ 3292 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3293 /*@ 3294 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3295 (the default parallel PETSc format). 3296 3297 Collective on MPI_Comm 3298 3299 Input Parameters: 3300 + B - the matrix 3301 . i - the indices into j for the start of each local row (starts with zero) 3302 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3303 - v - optional values in the matrix 3304 3305 Level: developer 3306 3307 Notes: 3308 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3309 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3310 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3311 3312 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3313 3314 The format which is used for the sparse matrix input, is equivalent to a 3315 row-major ordering.. i.e for the following matrix, the input data expected is 3316 as shown: 3317 3318 1 0 0 3319 2 0 3 P0 3320 ------- 3321 4 5 6 P1 3322 3323 Process0 [P0]: rows_owned=[0,1] 3324 i = {0,1,3} [size = nrow+1 = 2+1] 3325 j = {0,0,2} [size = nz = 6] 3326 v = {1,2,3} [size = nz = 6] 3327 3328 Process1 [P1]: rows_owned=[2] 3329 i = {0,3} [size = nrow+1 = 1+1] 3330 j = {0,1,2} [size = nz = 6] 3331 v = {4,5,6} [size = nz = 6] 3332 3333 The column indices for each row MUST be sorted. 3334 3335 .keywords: matrix, aij, compressed row, sparse, parallel 3336 3337 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3338 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3339 @*/ 3340 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3341 { 3342 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3343 3344 PetscFunctionBegin; 3345 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3346 if (f) { 3347 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3348 } 3349 PetscFunctionReturn(0); 3350 } 3351 3352 #undef __FUNCT__ 3353 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3354 /*@C 3355 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3356 (the default parallel PETSc format). For good matrix assembly performance 3357 the user should preallocate the matrix storage by setting the parameters 3358 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3359 performance can be increased by more than a factor of 50. 3360 3361 Collective on MPI_Comm 3362 3363 Input Parameters: 3364 + A - the matrix 3365 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3366 (same value is used for all local rows) 3367 . d_nnz - array containing the number of nonzeros in the various rows of the 3368 DIAGONAL portion of the local submatrix (possibly different for each row) 3369 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3370 The size of this array is equal to the number of local rows, i.e 'm'. 3371 You must leave room for the diagonal entry even if it is zero. 3372 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3373 submatrix (same value is used for all local rows). 3374 - o_nnz - array containing the number of nonzeros in the various rows of the 3375 OFF-DIAGONAL portion of the local submatrix (possibly different for 3376 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3377 structure. The size of this array is equal to the number 3378 of local rows, i.e 'm'. 3379 3380 If the *_nnz parameter is given then the *_nz parameter is ignored 3381 3382 The AIJ format (also called the Yale sparse matrix format or 3383 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3384 storage. The stored row and column indices begin with zero. See the users manual for details. 3385 3386 The parallel matrix is partitioned such that the first m0 rows belong to 3387 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3388 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3389 3390 The DIAGONAL portion of the local submatrix of a processor can be defined 3391 as the submatrix which is obtained by extraction the part corresponding 3392 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3393 first row that belongs to the processor, and r2 is the last row belonging 3394 to the this processor. This is a square mxm matrix. The remaining portion 3395 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3396 3397 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3398 3399 You can call MatGetInfo() to get information on how effective the preallocation was; 3400 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3401 You can also run with the option -info and look for messages with the string 3402 malloc in them to see if additional memory allocation was needed. 3403 3404 Example usage: 3405 3406 Consider the following 8x8 matrix with 34 non-zero values, that is 3407 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3408 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3409 as follows: 3410 3411 .vb 3412 1 2 0 | 0 3 0 | 0 4 3413 Proc0 0 5 6 | 7 0 0 | 8 0 3414 9 0 10 | 11 0 0 | 12 0 3415 ------------------------------------- 3416 13 0 14 | 15 16 17 | 0 0 3417 Proc1 0 18 0 | 19 20 21 | 0 0 3418 0 0 0 | 22 23 0 | 24 0 3419 ------------------------------------- 3420 Proc2 25 26 27 | 0 0 28 | 29 0 3421 30 0 0 | 31 32 33 | 0 34 3422 .ve 3423 3424 This can be represented as a collection of submatrices as: 3425 3426 .vb 3427 A B C 3428 D E F 3429 G H I 3430 .ve 3431 3432 Where the submatrices A,B,C are owned by proc0, D,E,F are 3433 owned by proc1, G,H,I are owned by proc2. 3434 3435 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3436 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3437 The 'M','N' parameters are 8,8, and have the same values on all procs. 3438 3439 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3440 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3441 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3442 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3443 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3444 matrix, ans [DF] as another SeqAIJ matrix. 3445 3446 When d_nz, o_nz parameters are specified, d_nz storage elements are 3447 allocated for every row of the local diagonal submatrix, and o_nz 3448 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3449 One way to choose d_nz and o_nz is to use the max nonzerors per local 3450 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3451 In this case, the values of d_nz,o_nz are: 3452 .vb 3453 proc0 : dnz = 2, o_nz = 2 3454 proc1 : dnz = 3, o_nz = 2 3455 proc2 : dnz = 1, o_nz = 4 3456 .ve 3457 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3458 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3459 for proc3. i.e we are using 12+15+10=37 storage locations to store 3460 34 values. 3461 3462 When d_nnz, o_nnz parameters are specified, the storage is specified 3463 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3464 In the above case the values for d_nnz,o_nnz are: 3465 .vb 3466 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3467 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3468 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3469 .ve 3470 Here the space allocated is sum of all the above values i.e 34, and 3471 hence pre-allocation is perfect. 3472 3473 Level: intermediate 3474 3475 .keywords: matrix, aij, compressed row, sparse, parallel 3476 3477 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3478 MPIAIJ, MatGetInfo() 3479 @*/ 3480 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3481 { 3482 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3483 3484 PetscFunctionBegin; 3485 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3486 if (f) { 3487 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3488 } 3489 PetscFunctionReturn(0); 3490 } 3491 3492 #undef __FUNCT__ 3493 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3494 /*@ 3495 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3496 CSR format the local rows. 3497 3498 Collective on MPI_Comm 3499 3500 Input Parameters: 3501 + comm - MPI communicator 3502 . m - number of local rows (Cannot be PETSC_DECIDE) 3503 . n - This value should be the same as the local size used in creating the 3504 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3505 calculated if N is given) For square matrices n is almost always m. 3506 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3507 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3508 . i - row indices 3509 . j - column indices 3510 - a - matrix values 3511 3512 Output Parameter: 3513 . mat - the matrix 3514 3515 Level: intermediate 3516 3517 Notes: 3518 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3519 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3520 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3521 3522 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3523 3524 The format which is used for the sparse matrix input, is equivalent to a 3525 row-major ordering.. i.e for the following matrix, the input data expected is 3526 as shown: 3527 3528 1 0 0 3529 2 0 3 P0 3530 ------- 3531 4 5 6 P1 3532 3533 Process0 [P0]: rows_owned=[0,1] 3534 i = {0,1,3} [size = nrow+1 = 2+1] 3535 j = {0,0,2} [size = nz = 6] 3536 v = {1,2,3} [size = nz = 6] 3537 3538 Process1 [P1]: rows_owned=[2] 3539 i = {0,3} [size = nrow+1 = 1+1] 3540 j = {0,1,2} [size = nz = 6] 3541 v = {4,5,6} [size = nz = 6] 3542 3543 .keywords: matrix, aij, compressed row, sparse, parallel 3544 3545 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3546 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3547 @*/ 3548 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3549 { 3550 PetscErrorCode ierr; 3551 3552 PetscFunctionBegin; 3553 if (i[0]) { 3554 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3555 } 3556 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3557 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3558 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3559 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3560 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3561 PetscFunctionReturn(0); 3562 } 3563 3564 #undef __FUNCT__ 3565 #define __FUNCT__ "MatCreateMPIAIJ" 3566 /*@C 3567 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3568 (the default parallel PETSc format). For good matrix assembly performance 3569 the user should preallocate the matrix storage by setting the parameters 3570 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3571 performance can be increased by more than a factor of 50. 3572 3573 Collective on MPI_Comm 3574 3575 Input Parameters: 3576 + comm - MPI communicator 3577 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3578 This value should be the same as the local size used in creating the 3579 y vector for the matrix-vector product y = Ax. 3580 . n - This value should be the same as the local size used in creating the 3581 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3582 calculated if N is given) For square matrices n is almost always m. 3583 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3584 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3585 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3586 (same value is used for all local rows) 3587 . d_nnz - array containing the number of nonzeros in the various rows of the 3588 DIAGONAL portion of the local submatrix (possibly different for each row) 3589 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3590 The size of this array is equal to the number of local rows, i.e 'm'. 3591 You must leave room for the diagonal entry even if it is zero. 3592 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3593 submatrix (same value is used for all local rows). 3594 - o_nnz - array containing the number of nonzeros in the various rows of the 3595 OFF-DIAGONAL portion of the local submatrix (possibly different for 3596 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3597 structure. The size of this array is equal to the number 3598 of local rows, i.e 'm'. 3599 3600 Output Parameter: 3601 . A - the matrix 3602 3603 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3604 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3605 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3606 3607 Notes: 3608 If the *_nnz parameter is given then the *_nz parameter is ignored 3609 3610 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3611 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3612 storage requirements for this matrix. 3613 3614 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3615 processor than it must be used on all processors that share the object for 3616 that argument. 3617 3618 The user MUST specify either the local or global matrix dimensions 3619 (possibly both). 3620 3621 The parallel matrix is partitioned across processors such that the 3622 first m0 rows belong to process 0, the next m1 rows belong to 3623 process 1, the next m2 rows belong to process 2 etc.. where 3624 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3625 values corresponding to [m x N] submatrix. 3626 3627 The columns are logically partitioned with the n0 columns belonging 3628 to 0th partition, the next n1 columns belonging to the next 3629 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3630 3631 The DIAGONAL portion of the local submatrix on any given processor 3632 is the submatrix corresponding to the rows and columns m,n 3633 corresponding to the given processor. i.e diagonal matrix on 3634 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3635 etc. The remaining portion of the local submatrix [m x (N-n)] 3636 constitute the OFF-DIAGONAL portion. The example below better 3637 illustrates this concept. 3638 3639 For a square global matrix we define each processor's diagonal portion 3640 to be its local rows and the corresponding columns (a square submatrix); 3641 each processor's off-diagonal portion encompasses the remainder of the 3642 local matrix (a rectangular submatrix). 3643 3644 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3645 3646 When calling this routine with a single process communicator, a matrix of 3647 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3648 type of communicator, use the construction mechanism: 3649 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3650 3651 By default, this format uses inodes (identical nodes) when possible. 3652 We search for consecutive rows with the same nonzero structure, thereby 3653 reusing matrix information to achieve increased efficiency. 3654 3655 Options Database Keys: 3656 + -mat_no_inode - Do not use inodes 3657 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3658 - -mat_aij_oneindex - Internally use indexing starting at 1 3659 rather than 0. Note that when calling MatSetValues(), 3660 the user still MUST index entries starting at 0! 3661 3662 3663 Example usage: 3664 3665 Consider the following 8x8 matrix with 34 non-zero values, that is 3666 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3667 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3668 as follows: 3669 3670 .vb 3671 1 2 0 | 0 3 0 | 0 4 3672 Proc0 0 5 6 | 7 0 0 | 8 0 3673 9 0 10 | 11 0 0 | 12 0 3674 ------------------------------------- 3675 13 0 14 | 15 16 17 | 0 0 3676 Proc1 0 18 0 | 19 20 21 | 0 0 3677 0 0 0 | 22 23 0 | 24 0 3678 ------------------------------------- 3679 Proc2 25 26 27 | 0 0 28 | 29 0 3680 30 0 0 | 31 32 33 | 0 34 3681 .ve 3682 3683 This can be represented as a collection of submatrices as: 3684 3685 .vb 3686 A B C 3687 D E F 3688 G H I 3689 .ve 3690 3691 Where the submatrices A,B,C are owned by proc0, D,E,F are 3692 owned by proc1, G,H,I are owned by proc2. 3693 3694 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3695 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3696 The 'M','N' parameters are 8,8, and have the same values on all procs. 3697 3698 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3699 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3700 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3701 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3702 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3703 matrix, ans [DF] as another SeqAIJ matrix. 3704 3705 When d_nz, o_nz parameters are specified, d_nz storage elements are 3706 allocated for every row of the local diagonal submatrix, and o_nz 3707 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3708 One way to choose d_nz and o_nz is to use the max nonzerors per local 3709 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3710 In this case, the values of d_nz,o_nz are: 3711 .vb 3712 proc0 : dnz = 2, o_nz = 2 3713 proc1 : dnz = 3, o_nz = 2 3714 proc2 : dnz = 1, o_nz = 4 3715 .ve 3716 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3717 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3718 for proc3. i.e we are using 12+15+10=37 storage locations to store 3719 34 values. 3720 3721 When d_nnz, o_nnz parameters are specified, the storage is specified 3722 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3723 In the above case the values for d_nnz,o_nnz are: 3724 .vb 3725 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3726 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3727 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3728 .ve 3729 Here the space allocated is sum of all the above values i.e 34, and 3730 hence pre-allocation is perfect. 3731 3732 Level: intermediate 3733 3734 .keywords: matrix, aij, compressed row, sparse, parallel 3735 3736 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3737 MPIAIJ, MatCreateMPIAIJWithArrays() 3738 @*/ 3739 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3740 { 3741 PetscErrorCode ierr; 3742 PetscMPIInt size; 3743 3744 PetscFunctionBegin; 3745 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3746 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3747 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3748 if (size > 1) { 3749 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3750 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3751 } else { 3752 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3753 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3754 } 3755 PetscFunctionReturn(0); 3756 } 3757 3758 #undef __FUNCT__ 3759 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3760 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3761 { 3762 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3763 3764 PetscFunctionBegin; 3765 *Ad = a->A; 3766 *Ao = a->B; 3767 *colmap = a->garray; 3768 PetscFunctionReturn(0); 3769 } 3770 3771 #undef __FUNCT__ 3772 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3773 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3774 { 3775 PetscErrorCode ierr; 3776 PetscInt i; 3777 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3778 3779 PetscFunctionBegin; 3780 if (coloring->ctype == IS_COLORING_GLOBAL) { 3781 ISColoringValue *allcolors,*colors; 3782 ISColoring ocoloring; 3783 3784 /* set coloring for diagonal portion */ 3785 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3786 3787 /* set coloring for off-diagonal portion */ 3788 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3789 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3790 for (i=0; i<a->B->cmap->n; i++) { 3791 colors[i] = allcolors[a->garray[i]]; 3792 } 3793 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3794 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3795 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3796 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3797 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3798 ISColoringValue *colors; 3799 PetscInt *larray; 3800 ISColoring ocoloring; 3801 3802 /* set coloring for diagonal portion */ 3803 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3804 for (i=0; i<a->A->cmap->n; i++) { 3805 larray[i] = i + A->cmap->rstart; 3806 } 3807 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3808 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3809 for (i=0; i<a->A->cmap->n; i++) { 3810 colors[i] = coloring->colors[larray[i]]; 3811 } 3812 ierr = PetscFree(larray);CHKERRQ(ierr); 3813 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3814 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3815 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3816 3817 /* set coloring for off-diagonal portion */ 3818 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3819 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3820 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3821 for (i=0; i<a->B->cmap->n; i++) { 3822 colors[i] = coloring->colors[larray[i]]; 3823 } 3824 ierr = PetscFree(larray);CHKERRQ(ierr); 3825 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3826 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3827 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3828 } else { 3829 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3830 } 3831 3832 PetscFunctionReturn(0); 3833 } 3834 3835 #if defined(PETSC_HAVE_ADIC) 3836 #undef __FUNCT__ 3837 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3838 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3839 { 3840 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3841 PetscErrorCode ierr; 3842 3843 PetscFunctionBegin; 3844 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3845 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3846 PetscFunctionReturn(0); 3847 } 3848 #endif 3849 3850 #undef __FUNCT__ 3851 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3852 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3853 { 3854 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3855 PetscErrorCode ierr; 3856 3857 PetscFunctionBegin; 3858 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3859 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3860 PetscFunctionReturn(0); 3861 } 3862 3863 #undef __FUNCT__ 3864 #define __FUNCT__ "MatMerge" 3865 /*@ 3866 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3867 matrices from each processor 3868 3869 Collective on MPI_Comm 3870 3871 Input Parameters: 3872 + comm - the communicators the parallel matrix will live on 3873 . inmat - the input sequential matrices 3874 . n - number of local columns (or PETSC_DECIDE) 3875 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3876 3877 Output Parameter: 3878 . outmat - the parallel matrix generated 3879 3880 Level: advanced 3881 3882 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3883 3884 @*/ 3885 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3886 { 3887 PetscErrorCode ierr; 3888 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3889 PetscInt *indx; 3890 PetscScalar *values; 3891 3892 PetscFunctionBegin; 3893 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3894 if (scall == MAT_INITIAL_MATRIX){ 3895 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3896 if (n == PETSC_DECIDE){ 3897 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3898 } 3899 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3900 rstart -= m; 3901 3902 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3903 for (i=0;i<m;i++) { 3904 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3905 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3906 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3907 } 3908 /* This routine will ONLY return MPIAIJ type matrix */ 3909 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3910 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3911 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3912 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3913 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3914 3915 } else if (scall == MAT_REUSE_MATRIX){ 3916 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3917 } else { 3918 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3919 } 3920 3921 for (i=0;i<m;i++) { 3922 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3923 Ii = i + rstart; 3924 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3925 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3926 } 3927 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3928 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3929 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3930 3931 PetscFunctionReturn(0); 3932 } 3933 3934 #undef __FUNCT__ 3935 #define __FUNCT__ "MatFileSplit" 3936 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3937 { 3938 PetscErrorCode ierr; 3939 PetscMPIInt rank; 3940 PetscInt m,N,i,rstart,nnz; 3941 size_t len; 3942 const PetscInt *indx; 3943 PetscViewer out; 3944 char *name; 3945 Mat B; 3946 const PetscScalar *values; 3947 3948 PetscFunctionBegin; 3949 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3950 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3951 /* Should this be the type of the diagonal block of A? */ 3952 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3953 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3954 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3955 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3956 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3957 for (i=0;i<m;i++) { 3958 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3959 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3960 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3961 } 3962 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3963 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3964 3965 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3966 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3967 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3968 sprintf(name,"%s.%d",outfile,rank); 3969 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3970 ierr = PetscFree(name); 3971 ierr = MatView(B,out);CHKERRQ(ierr); 3972 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3973 ierr = MatDestroy(B);CHKERRQ(ierr); 3974 PetscFunctionReturn(0); 3975 } 3976 3977 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3978 #undef __FUNCT__ 3979 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3980 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3981 { 3982 PetscErrorCode ierr; 3983 Mat_Merge_SeqsToMPI *merge; 3984 PetscContainer container; 3985 3986 PetscFunctionBegin; 3987 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3988 if (container) { 3989 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3990 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3991 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 4001 4002 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4003 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4004 } 4005 ierr = PetscFree(merge);CHKERRQ(ierr); 4006 4007 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4008 PetscFunctionReturn(0); 4009 } 4010 4011 #include "../src/mat/utils/freespace.h" 4012 #include "petscbt.h" 4013 4014 #undef __FUNCT__ 4015 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4016 /*@C 4017 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4018 matrices from each processor 4019 4020 Collective on MPI_Comm 4021 4022 Input Parameters: 4023 + comm - the communicators the parallel matrix will live on 4024 . seqmat - the input sequential matrices 4025 . m - number of local rows (or PETSC_DECIDE) 4026 . n - number of local columns (or PETSC_DECIDE) 4027 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4028 4029 Output Parameter: 4030 . mpimat - the parallel matrix generated 4031 4032 Level: advanced 4033 4034 Notes: 4035 The dimensions of the sequential matrix in each processor MUST be the same. 4036 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4037 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4038 @*/ 4039 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4040 { 4041 PetscErrorCode ierr; 4042 MPI_Comm comm=((PetscObject)mpimat)->comm; 4043 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4044 PetscMPIInt size,rank,taga,*len_s; 4045 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4046 PetscInt proc,m; 4047 PetscInt **buf_ri,**buf_rj; 4048 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4049 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4050 MPI_Request *s_waits,*r_waits; 4051 MPI_Status *status; 4052 MatScalar *aa=a->a; 4053 MatScalar **abuf_r,*ba_i; 4054 Mat_Merge_SeqsToMPI *merge; 4055 PetscContainer container; 4056 4057 PetscFunctionBegin; 4058 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4059 4060 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4061 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4062 4063 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4064 if (container) { 4065 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4066 } 4067 bi = merge->bi; 4068 bj = merge->bj; 4069 buf_ri = merge->buf_ri; 4070 buf_rj = merge->buf_rj; 4071 4072 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4073 owners = merge->rowmap.range; 4074 len_s = merge->len_s; 4075 4076 /* send and recv matrix values */ 4077 /*-----------------------------*/ 4078 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4079 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4080 4081 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4082 for (proc=0,k=0; proc<size; proc++){ 4083 if (!len_s[proc]) continue; 4084 i = owners[proc]; 4085 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4086 k++; 4087 } 4088 4089 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4090 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4091 ierr = PetscFree(status);CHKERRQ(ierr); 4092 4093 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4094 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4095 4096 /* insert mat values of mpimat */ 4097 /*----------------------------*/ 4098 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4099 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4100 nextrow = buf_ri_k + merge->nrecv; 4101 nextai = nextrow + merge->nrecv; 4102 4103 for (k=0; k<merge->nrecv; k++){ 4104 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4105 nrows = *(buf_ri_k[k]); 4106 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4107 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4108 } 4109 4110 /* set values of ba */ 4111 m = merge->rowmap.n; 4112 for (i=0; i<m; i++) { 4113 arow = owners[rank] + i; 4114 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4115 bnzi = bi[i+1] - bi[i]; 4116 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4117 4118 /* add local non-zero vals of this proc's seqmat into ba */ 4119 anzi = ai[arow+1] - ai[arow]; 4120 aj = a->j + ai[arow]; 4121 aa = a->a + ai[arow]; 4122 nextaj = 0; 4123 for (j=0; nextaj<anzi; j++){ 4124 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4125 ba_i[j] += aa[nextaj++]; 4126 } 4127 } 4128 4129 /* add received vals into ba */ 4130 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4131 /* i-th row */ 4132 if (i == *nextrow[k]) { 4133 anzi = *(nextai[k]+1) - *nextai[k]; 4134 aj = buf_rj[k] + *(nextai[k]); 4135 aa = abuf_r[k] + *(nextai[k]); 4136 nextaj = 0; 4137 for (j=0; nextaj<anzi; j++){ 4138 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4139 ba_i[j] += aa[nextaj++]; 4140 } 4141 } 4142 nextrow[k]++; nextai[k]++; 4143 } 4144 } 4145 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4146 } 4147 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4148 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 4150 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4151 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4152 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4153 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4154 PetscFunctionReturn(0); 4155 } 4156 4157 #undef __FUNCT__ 4158 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4159 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4160 { 4161 PetscErrorCode ierr; 4162 Mat B_mpi; 4163 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4164 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4165 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4166 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4167 PetscInt len,proc,*dnz,*onz; 4168 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4169 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4170 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4171 MPI_Status *status; 4172 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4173 PetscBT lnkbt; 4174 Mat_Merge_SeqsToMPI *merge; 4175 PetscContainer container; 4176 4177 PetscFunctionBegin; 4178 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4179 4180 /* make sure it is a PETSc comm */ 4181 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4182 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4183 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4184 4185 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4186 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4187 4188 /* determine row ownership */ 4189 /*---------------------------------------------------------*/ 4190 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4191 merge->rowmap.n = m; 4192 merge->rowmap.N = M; 4193 merge->rowmap.bs = 1; 4194 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4195 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4196 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4197 4198 m = merge->rowmap.n; 4199 M = merge->rowmap.N; 4200 owners = merge->rowmap.range; 4201 4202 /* determine the number of messages to send, their lengths */ 4203 /*---------------------------------------------------------*/ 4204 len_s = merge->len_s; 4205 4206 len = 0; /* length of buf_si[] */ 4207 merge->nsend = 0; 4208 for (proc=0; proc<size; proc++){ 4209 len_si[proc] = 0; 4210 if (proc == rank){ 4211 len_s[proc] = 0; 4212 } else { 4213 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4214 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4215 } 4216 if (len_s[proc]) { 4217 merge->nsend++; 4218 nrows = 0; 4219 for (i=owners[proc]; i<owners[proc+1]; i++){ 4220 if (ai[i+1] > ai[i]) nrows++; 4221 } 4222 len_si[proc] = 2*(nrows+1); 4223 len += len_si[proc]; 4224 } 4225 } 4226 4227 /* determine the number and length of messages to receive for ij-structure */ 4228 /*-------------------------------------------------------------------------*/ 4229 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4230 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4231 4232 /* post the Irecv of j-structure */ 4233 /*-------------------------------*/ 4234 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4235 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4236 4237 /* post the Isend of j-structure */ 4238 /*--------------------------------*/ 4239 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4240 sj_waits = si_waits + merge->nsend; 4241 4242 for (proc=0, k=0; proc<size; proc++){ 4243 if (!len_s[proc]) continue; 4244 i = owners[proc]; 4245 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4246 k++; 4247 } 4248 4249 /* receives and sends of j-structure are complete */ 4250 /*------------------------------------------------*/ 4251 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4252 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4253 4254 /* send and recv i-structure */ 4255 /*---------------------------*/ 4256 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4257 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4258 4259 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4260 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4261 for (proc=0,k=0; proc<size; proc++){ 4262 if (!len_s[proc]) continue; 4263 /* form outgoing message for i-structure: 4264 buf_si[0]: nrows to be sent 4265 [1:nrows]: row index (global) 4266 [nrows+1:2*nrows+1]: i-structure index 4267 */ 4268 /*-------------------------------------------*/ 4269 nrows = len_si[proc]/2 - 1; 4270 buf_si_i = buf_si + nrows+1; 4271 buf_si[0] = nrows; 4272 buf_si_i[0] = 0; 4273 nrows = 0; 4274 for (i=owners[proc]; i<owners[proc+1]; i++){ 4275 anzi = ai[i+1] - ai[i]; 4276 if (anzi) { 4277 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4278 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4279 nrows++; 4280 } 4281 } 4282 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4283 k++; 4284 buf_si += len_si[proc]; 4285 } 4286 4287 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4288 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4289 4290 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4291 for (i=0; i<merge->nrecv; i++){ 4292 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4293 } 4294 4295 ierr = PetscFree(len_si);CHKERRQ(ierr); 4296 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4297 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4298 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4299 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4300 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4301 ierr = PetscFree(status);CHKERRQ(ierr); 4302 4303 /* compute a local seq matrix in each processor */ 4304 /*----------------------------------------------*/ 4305 /* allocate bi array and free space for accumulating nonzero column info */ 4306 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4307 bi[0] = 0; 4308 4309 /* create and initialize a linked list */ 4310 nlnk = N+1; 4311 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4312 4313 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4314 len = 0; 4315 len = ai[owners[rank+1]] - ai[owners[rank]]; 4316 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4317 current_space = free_space; 4318 4319 /* determine symbolic info for each local row */ 4320 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4321 nextrow = buf_ri_k + merge->nrecv; 4322 nextai = nextrow + merge->nrecv; 4323 for (k=0; k<merge->nrecv; k++){ 4324 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4325 nrows = *buf_ri_k[k]; 4326 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4327 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4328 } 4329 4330 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4331 len = 0; 4332 for (i=0;i<m;i++) { 4333 bnzi = 0; 4334 /* add local non-zero cols of this proc's seqmat into lnk */ 4335 arow = owners[rank] + i; 4336 anzi = ai[arow+1] - ai[arow]; 4337 aj = a->j + ai[arow]; 4338 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4339 bnzi += nlnk; 4340 /* add received col data into lnk */ 4341 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4342 if (i == *nextrow[k]) { /* i-th row */ 4343 anzi = *(nextai[k]+1) - *nextai[k]; 4344 aj = buf_rj[k] + *nextai[k]; 4345 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4346 bnzi += nlnk; 4347 nextrow[k]++; nextai[k]++; 4348 } 4349 } 4350 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4351 4352 /* if free space is not available, make more free space */ 4353 if (current_space->local_remaining<bnzi) { 4354 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4355 nspacedouble++; 4356 } 4357 /* copy data into free space, then initialize lnk */ 4358 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4359 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4360 4361 current_space->array += bnzi; 4362 current_space->local_used += bnzi; 4363 current_space->local_remaining -= bnzi; 4364 4365 bi[i+1] = bi[i] + bnzi; 4366 } 4367 4368 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4369 4370 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4371 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4372 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4373 4374 /* create symbolic parallel matrix B_mpi */ 4375 /*---------------------------------------*/ 4376 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4377 if (n==PETSC_DECIDE) { 4378 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4379 } else { 4380 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4381 } 4382 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4383 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4384 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4385 4386 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4387 B_mpi->assembled = PETSC_FALSE; 4388 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4389 merge->bi = bi; 4390 merge->bj = bj; 4391 merge->buf_ri = buf_ri; 4392 merge->buf_rj = buf_rj; 4393 merge->coi = PETSC_NULL; 4394 merge->coj = PETSC_NULL; 4395 merge->owners_co = PETSC_NULL; 4396 4397 /* attach the supporting struct to B_mpi for reuse */ 4398 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4399 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4400 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4401 *mpimat = B_mpi; 4402 4403 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4404 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4405 PetscFunctionReturn(0); 4406 } 4407 4408 #undef __FUNCT__ 4409 #define __FUNCT__ "MatMerge_SeqsToMPI" 4410 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4411 { 4412 PetscErrorCode ierr; 4413 4414 PetscFunctionBegin; 4415 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4416 if (scall == MAT_INITIAL_MATRIX){ 4417 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4418 } 4419 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4420 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4421 PetscFunctionReturn(0); 4422 } 4423 4424 #undef __FUNCT__ 4425 #define __FUNCT__ "MatGetLocalMat" 4426 /*@ 4427 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4428 4429 Not Collective 4430 4431 Input Parameters: 4432 + A - the matrix 4433 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4434 4435 Output Parameter: 4436 . A_loc - the local sequential matrix generated 4437 4438 Level: developer 4439 4440 @*/ 4441 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4442 { 4443 PetscErrorCode ierr; 4444 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4445 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4446 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4447 MatScalar *aa=a->a,*ba=b->a,*cam; 4448 PetscScalar *ca; 4449 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4450 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4451 4452 PetscFunctionBegin; 4453 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4454 if (scall == MAT_INITIAL_MATRIX){ 4455 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4456 ci[0] = 0; 4457 for (i=0; i<am; i++){ 4458 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4459 } 4460 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4461 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4462 k = 0; 4463 for (i=0; i<am; i++) { 4464 ncols_o = bi[i+1] - bi[i]; 4465 ncols_d = ai[i+1] - ai[i]; 4466 /* off-diagonal portion of A */ 4467 for (jo=0; jo<ncols_o; jo++) { 4468 col = cmap[*bj]; 4469 if (col >= cstart) break; 4470 cj[k] = col; bj++; 4471 ca[k++] = *ba++; 4472 } 4473 /* diagonal portion of A */ 4474 for (j=0; j<ncols_d; j++) { 4475 cj[k] = cstart + *aj++; 4476 ca[k++] = *aa++; 4477 } 4478 /* off-diagonal portion of A */ 4479 for (j=jo; j<ncols_o; j++) { 4480 cj[k] = cmap[*bj++]; 4481 ca[k++] = *ba++; 4482 } 4483 } 4484 /* put together the new matrix */ 4485 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4486 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4487 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4488 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4489 mat->free_a = PETSC_TRUE; 4490 mat->free_ij = PETSC_TRUE; 4491 mat->nonew = 0; 4492 } else if (scall == MAT_REUSE_MATRIX){ 4493 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4494 ci = mat->i; cj = mat->j; cam = mat->a; 4495 for (i=0; i<am; i++) { 4496 /* off-diagonal portion of A */ 4497 ncols_o = bi[i+1] - bi[i]; 4498 for (jo=0; jo<ncols_o; jo++) { 4499 col = cmap[*bj]; 4500 if (col >= cstart) break; 4501 *cam++ = *ba++; bj++; 4502 } 4503 /* diagonal portion of A */ 4504 ncols_d = ai[i+1] - ai[i]; 4505 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4506 /* off-diagonal portion of A */ 4507 for (j=jo; j<ncols_o; j++) { 4508 *cam++ = *ba++; bj++; 4509 } 4510 } 4511 } else { 4512 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4513 } 4514 4515 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4516 PetscFunctionReturn(0); 4517 } 4518 4519 #undef __FUNCT__ 4520 #define __FUNCT__ "MatGetLocalMatCondensed" 4521 /*@C 4522 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4523 4524 Not Collective 4525 4526 Input Parameters: 4527 + A - the matrix 4528 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4529 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4530 4531 Output Parameter: 4532 . A_loc - the local sequential matrix generated 4533 4534 Level: developer 4535 4536 @*/ 4537 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4538 { 4539 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4540 PetscErrorCode ierr; 4541 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4542 IS isrowa,iscola; 4543 Mat *aloc; 4544 4545 PetscFunctionBegin; 4546 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4547 if (!row){ 4548 start = A->rmap->rstart; end = A->rmap->rend; 4549 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4550 } else { 4551 isrowa = *row; 4552 } 4553 if (!col){ 4554 start = A->cmap->rstart; 4555 cmap = a->garray; 4556 nzA = a->A->cmap->n; 4557 nzB = a->B->cmap->n; 4558 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4559 ncols = 0; 4560 for (i=0; i<nzB; i++) { 4561 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4562 else break; 4563 } 4564 imark = i; 4565 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4566 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4567 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4568 ierr = PetscFree(idx);CHKERRQ(ierr); 4569 } else { 4570 iscola = *col; 4571 } 4572 if (scall != MAT_INITIAL_MATRIX){ 4573 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4574 aloc[0] = *A_loc; 4575 } 4576 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4577 *A_loc = aloc[0]; 4578 ierr = PetscFree(aloc);CHKERRQ(ierr); 4579 if (!row){ 4580 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4581 } 4582 if (!col){ 4583 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4584 } 4585 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4586 PetscFunctionReturn(0); 4587 } 4588 4589 #undef __FUNCT__ 4590 #define __FUNCT__ "MatGetBrowsOfAcols" 4591 /*@C 4592 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4593 4594 Collective on Mat 4595 4596 Input Parameters: 4597 + A,B - the matrices in mpiaij format 4598 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4599 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4600 4601 Output Parameter: 4602 + rowb, colb - index sets of rows and columns of B to extract 4603 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4604 - B_seq - the sequential matrix generated 4605 4606 Level: developer 4607 4608 @*/ 4609 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4610 { 4611 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4612 PetscErrorCode ierr; 4613 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4614 IS isrowb,iscolb; 4615 Mat *bseq; 4616 4617 PetscFunctionBegin; 4618 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4619 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4620 } 4621 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4622 4623 if (scall == MAT_INITIAL_MATRIX){ 4624 start = A->cmap->rstart; 4625 cmap = a->garray; 4626 nzA = a->A->cmap->n; 4627 nzB = a->B->cmap->n; 4628 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4629 ncols = 0; 4630 for (i=0; i<nzB; i++) { /* row < local row index */ 4631 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4632 else break; 4633 } 4634 imark = i; 4635 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4636 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4637 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4638 ierr = PetscFree(idx);CHKERRQ(ierr); 4639 *brstart = imark; 4640 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4641 } else { 4642 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4643 isrowb = *rowb; iscolb = *colb; 4644 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4645 bseq[0] = *B_seq; 4646 } 4647 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4648 *B_seq = bseq[0]; 4649 ierr = PetscFree(bseq);CHKERRQ(ierr); 4650 if (!rowb){ 4651 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4652 } else { 4653 *rowb = isrowb; 4654 } 4655 if (!colb){ 4656 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4657 } else { 4658 *colb = iscolb; 4659 } 4660 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4661 PetscFunctionReturn(0); 4662 } 4663 4664 #undef __FUNCT__ 4665 #define __FUNCT__ "MatGetBrowsOfAoCols" 4666 /*@C 4667 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4668 of the OFF-DIAGONAL portion of local A 4669 4670 Collective on Mat 4671 4672 Input Parameters: 4673 + A,B - the matrices in mpiaij format 4674 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4675 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4676 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4677 4678 Output Parameter: 4679 + B_oth - the sequential matrix generated 4680 4681 Level: developer 4682 4683 @*/ 4684 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4685 { 4686 VecScatter_MPI_General *gen_to,*gen_from; 4687 PetscErrorCode ierr; 4688 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4689 Mat_SeqAIJ *b_oth; 4690 VecScatter ctx=a->Mvctx; 4691 MPI_Comm comm=((PetscObject)ctx)->comm; 4692 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4693 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4694 PetscScalar *rvalues,*svalues; 4695 MatScalar *b_otha,*bufa,*bufA; 4696 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4697 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4698 MPI_Status *sstatus,rstatus; 4699 PetscMPIInt jj; 4700 PetscInt *cols,sbs,rbs; 4701 PetscScalar *vals; 4702 4703 PetscFunctionBegin; 4704 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4705 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4706 } 4707 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4708 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4709 4710 gen_to = (VecScatter_MPI_General*)ctx->todata; 4711 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4712 rvalues = gen_from->values; /* holds the length of receiving row */ 4713 svalues = gen_to->values; /* holds the length of sending row */ 4714 nrecvs = gen_from->n; 4715 nsends = gen_to->n; 4716 4717 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4718 srow = gen_to->indices; /* local row index to be sent */ 4719 sstarts = gen_to->starts; 4720 sprocs = gen_to->procs; 4721 sstatus = gen_to->sstatus; 4722 sbs = gen_to->bs; 4723 rstarts = gen_from->starts; 4724 rprocs = gen_from->procs; 4725 rbs = gen_from->bs; 4726 4727 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4728 if (scall == MAT_INITIAL_MATRIX){ 4729 /* i-array */ 4730 /*---------*/ 4731 /* post receives */ 4732 for (i=0; i<nrecvs; i++){ 4733 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4734 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4735 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4736 } 4737 4738 /* pack the outgoing message */ 4739 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4740 rstartsj = sstartsj + nsends +1; 4741 sstartsj[0] = 0; rstartsj[0] = 0; 4742 len = 0; /* total length of j or a array to be sent */ 4743 k = 0; 4744 for (i=0; i<nsends; i++){ 4745 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4746 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4747 for (j=0; j<nrows; j++) { 4748 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4749 for (l=0; l<sbs; l++){ 4750 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4751 rowlen[j*sbs+l] = ncols; 4752 len += ncols; 4753 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4754 } 4755 k++; 4756 } 4757 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4758 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4759 } 4760 /* recvs and sends of i-array are completed */ 4761 i = nrecvs; 4762 while (i--) { 4763 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4764 } 4765 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4766 4767 /* allocate buffers for sending j and a arrays */ 4768 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4769 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4770 4771 /* create i-array of B_oth */ 4772 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4773 b_othi[0] = 0; 4774 len = 0; /* total length of j or a array to be received */ 4775 k = 0; 4776 for (i=0; i<nrecvs; i++){ 4777 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4778 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4779 for (j=0; j<nrows; j++) { 4780 b_othi[k+1] = b_othi[k] + rowlen[j]; 4781 len += rowlen[j]; k++; 4782 } 4783 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4784 } 4785 4786 /* allocate space for j and a arrrays of B_oth */ 4787 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4788 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4789 4790 /* j-array */ 4791 /*---------*/ 4792 /* post receives of j-array */ 4793 for (i=0; i<nrecvs; i++){ 4794 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4795 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4796 } 4797 4798 /* pack the outgoing message j-array */ 4799 k = 0; 4800 for (i=0; i<nsends; i++){ 4801 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4802 bufJ = bufj+sstartsj[i]; 4803 for (j=0; j<nrows; j++) { 4804 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4805 for (ll=0; ll<sbs; ll++){ 4806 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4807 for (l=0; l<ncols; l++){ 4808 *bufJ++ = cols[l]; 4809 } 4810 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4811 } 4812 } 4813 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4814 } 4815 4816 /* recvs and sends of j-array are completed */ 4817 i = nrecvs; 4818 while (i--) { 4819 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4820 } 4821 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4822 } else if (scall == MAT_REUSE_MATRIX){ 4823 sstartsj = *startsj; 4824 rstartsj = sstartsj + nsends +1; 4825 bufa = *bufa_ptr; 4826 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4827 b_otha = b_oth->a; 4828 } else { 4829 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4830 } 4831 4832 /* a-array */ 4833 /*---------*/ 4834 /* post receives of a-array */ 4835 for (i=0; i<nrecvs; i++){ 4836 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4837 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4838 } 4839 4840 /* pack the outgoing message a-array */ 4841 k = 0; 4842 for (i=0; i<nsends; i++){ 4843 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4844 bufA = bufa+sstartsj[i]; 4845 for (j=0; j<nrows; j++) { 4846 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4847 for (ll=0; ll<sbs; ll++){ 4848 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4849 for (l=0; l<ncols; l++){ 4850 *bufA++ = vals[l]; 4851 } 4852 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4853 } 4854 } 4855 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4856 } 4857 /* recvs and sends of a-array are completed */ 4858 i = nrecvs; 4859 while (i--) { 4860 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4861 } 4862 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4863 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4864 4865 if (scall == MAT_INITIAL_MATRIX){ 4866 /* put together the new matrix */ 4867 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4868 4869 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4870 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4871 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4872 b_oth->free_a = PETSC_TRUE; 4873 b_oth->free_ij = PETSC_TRUE; 4874 b_oth->nonew = 0; 4875 4876 ierr = PetscFree(bufj);CHKERRQ(ierr); 4877 if (!startsj || !bufa_ptr){ 4878 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4879 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4880 } else { 4881 *startsj = sstartsj; 4882 *bufa_ptr = bufa; 4883 } 4884 } 4885 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4886 PetscFunctionReturn(0); 4887 } 4888 4889 #undef __FUNCT__ 4890 #define __FUNCT__ "MatGetCommunicationStructs" 4891 /*@C 4892 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4893 4894 Not Collective 4895 4896 Input Parameters: 4897 . A - The matrix in mpiaij format 4898 4899 Output Parameter: 4900 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4901 . colmap - A map from global column index to local index into lvec 4902 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4903 4904 Level: developer 4905 4906 @*/ 4907 #if defined (PETSC_USE_CTABLE) 4908 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4909 #else 4910 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4911 #endif 4912 { 4913 Mat_MPIAIJ *a; 4914 4915 PetscFunctionBegin; 4916 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4917 PetscValidPointer(lvec, 2) 4918 PetscValidPointer(colmap, 3) 4919 PetscValidPointer(multScatter, 4) 4920 a = (Mat_MPIAIJ *) A->data; 4921 if (lvec) *lvec = a->lvec; 4922 if (colmap) *colmap = a->colmap; 4923 if (multScatter) *multScatter = a->Mvctx; 4924 PetscFunctionReturn(0); 4925 } 4926 4927 EXTERN_C_BEGIN 4928 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4930 EXTERN_C_END 4931 4932 #include "../src/mat/impls/dense/mpi/mpidense.h" 4933 4934 #undef __FUNCT__ 4935 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4936 /* 4937 Computes (B'*A')' since computing B*A directly is untenable 4938 4939 n p p 4940 ( ) ( ) ( ) 4941 m ( A ) * n ( B ) = m ( C ) 4942 ( ) ( ) ( ) 4943 4944 */ 4945 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4946 { 4947 PetscErrorCode ierr; 4948 Mat At,Bt,Ct; 4949 4950 PetscFunctionBegin; 4951 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4952 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4953 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4954 ierr = MatDestroy(At);CHKERRQ(ierr); 4955 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4956 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4957 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4958 PetscFunctionReturn(0); 4959 } 4960 4961 #undef __FUNCT__ 4962 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4963 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4964 { 4965 PetscErrorCode ierr; 4966 PetscInt m=A->rmap->n,n=B->cmap->n; 4967 Mat Cmat; 4968 4969 PetscFunctionBegin; 4970 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4971 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4972 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4973 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4974 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4975 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4976 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4977 *C = Cmat; 4978 PetscFunctionReturn(0); 4979 } 4980 4981 /* ----------------------------------------------------------------*/ 4982 #undef __FUNCT__ 4983 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4984 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4985 { 4986 PetscErrorCode ierr; 4987 4988 PetscFunctionBegin; 4989 if (scall == MAT_INITIAL_MATRIX){ 4990 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4991 } 4992 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4993 PetscFunctionReturn(0); 4994 } 4995 4996 EXTERN_C_BEGIN 4997 #if defined(PETSC_HAVE_MUMPS) 4998 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4999 #endif 5000 #if defined(PETSC_HAVE_PASTIX) 5001 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5002 #endif 5003 #if defined(PETSC_HAVE_SUPERLU_DIST) 5004 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5005 #endif 5006 #if defined(PETSC_HAVE_SPOOLES) 5007 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5008 #endif 5009 EXTERN_C_END 5010 5011 /*MC 5012 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5013 5014 Options Database Keys: 5015 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5016 5017 Level: beginner 5018 5019 .seealso: MatCreateMPIAIJ() 5020 M*/ 5021 5022 EXTERN_C_BEGIN 5023 #undef __FUNCT__ 5024 #define __FUNCT__ "MatCreate_MPIAIJ" 5025 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5026 { 5027 Mat_MPIAIJ *b; 5028 PetscErrorCode ierr; 5029 PetscMPIInt size; 5030 5031 PetscFunctionBegin; 5032 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5033 5034 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5035 B->data = (void*)b; 5036 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5037 B->rmap->bs = 1; 5038 B->assembled = PETSC_FALSE; 5039 B->mapping = 0; 5040 5041 B->insertmode = NOT_SET_VALUES; 5042 b->size = size; 5043 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5044 5045 /* build cache for off array entries formed */ 5046 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5047 b->donotstash = PETSC_FALSE; 5048 b->colmap = 0; 5049 b->garray = 0; 5050 b->roworiented = PETSC_TRUE; 5051 5052 /* stuff used for matrix vector multiply */ 5053 b->lvec = PETSC_NULL; 5054 b->Mvctx = PETSC_NULL; 5055 5056 /* stuff for MatGetRow() */ 5057 b->rowindices = 0; 5058 b->rowvalues = 0; 5059 b->getrowactive = PETSC_FALSE; 5060 5061 #if defined(PETSC_HAVE_SPOOLES) 5062 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 5063 "MatGetFactor_mpiaij_spooles", 5064 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5065 #endif 5066 #if defined(PETSC_HAVE_MUMPS) 5067 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 5068 "MatGetFactor_mpiaij_mumps", 5069 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5070 #endif 5071 #if defined(PETSC_HAVE_PASTIX) 5072 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 5073 "MatGetFactor_mpiaij_pastix", 5074 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5075 #endif 5076 #if defined(PETSC_HAVE_SUPERLU_DIST) 5077 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 5078 "MatGetFactor_mpiaij_superlu_dist", 5079 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5080 #endif 5081 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5082 "MatStoreValues_MPIAIJ", 5083 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5085 "MatRetrieveValues_MPIAIJ", 5086 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5088 "MatGetDiagonalBlock_MPIAIJ", 5089 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5091 "MatIsTranspose_MPIAIJ", 5092 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5094 "MatMPIAIJSetPreallocation_MPIAIJ", 5095 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5096 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5097 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5098 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5100 "MatDiagonalScaleLocal_MPIAIJ", 5101 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5103 "MatConvert_MPIAIJ_MPICSRPERM", 5104 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5106 "MatConvert_MPIAIJ_MPICRL", 5107 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5108 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5109 "MatMatMult_MPIDense_MPIAIJ", 5110 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5112 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5113 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5115 "MatMatMultNumeric_MPIDense_MPIAIJ", 5116 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5117 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5118 PetscFunctionReturn(0); 5119 } 5120 EXTERN_C_END 5121 5122 #undef __FUNCT__ 5123 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5124 /*@ 5125 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5126 and "off-diagonal" part of the matrix in CSR format. 5127 5128 Collective on MPI_Comm 5129 5130 Input Parameters: 5131 + comm - MPI communicator 5132 . m - number of local rows (Cannot be PETSC_DECIDE) 5133 . n - This value should be the same as the local size used in creating the 5134 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5135 calculated if N is given) For square matrices n is almost always m. 5136 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5137 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5138 . i - row indices for "diagonal" portion of matrix 5139 . j - column indices 5140 . a - matrix values 5141 . oi - row indices for "off-diagonal" portion of matrix 5142 . oj - column indices 5143 - oa - matrix values 5144 5145 Output Parameter: 5146 . mat - the matrix 5147 5148 Level: advanced 5149 5150 Notes: 5151 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5152 5153 The i and j indices are 0 based 5154 5155 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5156 5157 5158 .keywords: matrix, aij, compressed row, sparse, parallel 5159 5160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5161 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5162 @*/ 5163 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5164 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5165 { 5166 PetscErrorCode ierr; 5167 Mat_MPIAIJ *maij; 5168 5169 PetscFunctionBegin; 5170 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5171 if (i[0]) { 5172 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5173 } 5174 if (oi[0]) { 5175 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5176 } 5177 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5178 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5179 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5180 maij = (Mat_MPIAIJ*) (*mat)->data; 5181 maij->donotstash = PETSC_TRUE; 5182 (*mat)->preallocated = PETSC_TRUE; 5183 5184 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5185 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5186 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5187 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5188 5189 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5190 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5191 5192 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5193 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5194 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5195 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5196 5197 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5198 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5199 PetscFunctionReturn(0); 5200 } 5201 5202 /* 5203 Special version for direct calls from Fortran 5204 */ 5205 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5206 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5207 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5208 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5209 #endif 5210 5211 /* Change these macros so can be used in void function */ 5212 #undef CHKERRQ 5213 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5214 #undef SETERRQ2 5215 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5216 #undef SETERRQ 5217 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5218 5219 EXTERN_C_BEGIN 5220 #undef __FUNCT__ 5221 #define __FUNCT__ "matsetvaluesmpiaij_" 5222 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5223 { 5224 Mat mat = *mmat; 5225 PetscInt m = *mm, n = *mn; 5226 InsertMode addv = *maddv; 5227 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5228 PetscScalar value; 5229 PetscErrorCode ierr; 5230 5231 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5232 if (mat->insertmode == NOT_SET_VALUES) { 5233 mat->insertmode = addv; 5234 } 5235 #if defined(PETSC_USE_DEBUG) 5236 else if (mat->insertmode != addv) { 5237 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5238 } 5239 #endif 5240 { 5241 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5242 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5243 PetscTruth roworiented = aij->roworiented; 5244 5245 /* Some Variables required in the macro */ 5246 Mat A = aij->A; 5247 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5248 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5249 MatScalar *aa = a->a; 5250 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5251 Mat B = aij->B; 5252 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5253 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5254 MatScalar *ba = b->a; 5255 5256 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5257 PetscInt nonew = a->nonew; 5258 MatScalar *ap1,*ap2; 5259 5260 PetscFunctionBegin; 5261 for (i=0; i<m; i++) { 5262 if (im[i] < 0) continue; 5263 #if defined(PETSC_USE_DEBUG) 5264 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5265 #endif 5266 if (im[i] >= rstart && im[i] < rend) { 5267 row = im[i] - rstart; 5268 lastcol1 = -1; 5269 rp1 = aj + ai[row]; 5270 ap1 = aa + ai[row]; 5271 rmax1 = aimax[row]; 5272 nrow1 = ailen[row]; 5273 low1 = 0; 5274 high1 = nrow1; 5275 lastcol2 = -1; 5276 rp2 = bj + bi[row]; 5277 ap2 = ba + bi[row]; 5278 rmax2 = bimax[row]; 5279 nrow2 = bilen[row]; 5280 low2 = 0; 5281 high2 = nrow2; 5282 5283 for (j=0; j<n; j++) { 5284 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5285 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5286 if (in[j] >= cstart && in[j] < cend){ 5287 col = in[j] - cstart; 5288 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5289 } else if (in[j] < 0) continue; 5290 #if defined(PETSC_USE_DEBUG) 5291 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5292 #endif 5293 else { 5294 if (mat->was_assembled) { 5295 if (!aij->colmap) { 5296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5297 } 5298 #if defined (PETSC_USE_CTABLE) 5299 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5300 col--; 5301 #else 5302 col = aij->colmap[in[j]] - 1; 5303 #endif 5304 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5305 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5306 col = in[j]; 5307 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5308 B = aij->B; 5309 b = (Mat_SeqAIJ*)B->data; 5310 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5311 rp2 = bj + bi[row]; 5312 ap2 = ba + bi[row]; 5313 rmax2 = bimax[row]; 5314 nrow2 = bilen[row]; 5315 low2 = 0; 5316 high2 = nrow2; 5317 bm = aij->B->rmap->n; 5318 ba = b->a; 5319 } 5320 } else col = in[j]; 5321 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5322 } 5323 } 5324 } else { 5325 if (!aij->donotstash) { 5326 if (roworiented) { 5327 if (ignorezeroentries && v[i*n] == 0.0) continue; 5328 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5329 } else { 5330 if (ignorezeroentries && v[i] == 0.0) continue; 5331 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5332 } 5333 } 5334 } 5335 }} 5336 PetscFunctionReturnVoid(); 5337 } 5338 EXTERN_C_END 5339 5340