1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscTruth roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscTruth ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 468 mat->insertmode = addv; /* in case this processor had no cache */ 469 470 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 471 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 472 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 482 PetscErrorCode ierr; 483 PetscMPIInt n; 484 PetscInt i,j,rstart,ncols,flg; 485 PetscInt *row,*col; 486 PetscTruth other_disassembled; 487 PetscScalar *val; 488 InsertMode addv = mat->insertmode; 489 490 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 491 PetscFunctionBegin; 492 if (!aij->donotstash) { 493 while (1) { 494 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 495 if (!flg) break; 496 497 for (i=0; i<n;) { 498 /* Now identify the consecutive vals belonging to the same row */ 499 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 500 if (j < n) ncols = j-i; 501 else ncols = n-i; 502 /* Now assemble all these values with a single function call */ 503 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 504 i = j; 505 } 506 } 507 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 508 } 509 a->compressedrow.use = PETSC_FALSE; 510 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 511 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 512 513 /* determine if any processor has disassembled, if so we must 514 also disassemble ourselfs, in order that we may reassemble. */ 515 /* 516 if nonzero structure of submatrix B cannot change then we know that 517 no processor disassembled thus we can skip this stuff 518 */ 519 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 520 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 521 if (mat->was_assembled && !other_disassembled) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 } 524 } 525 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 526 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 529 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 530 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 532 533 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 534 aij->rowvalues = 0; 535 536 /* used by MatAXPY() */ 537 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 538 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 539 540 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 541 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 548 { 549 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 554 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatZeroRows_MPIAIJ" 560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 561 { 562 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 563 PetscErrorCode ierr; 564 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 565 PetscInt i,*owners = A->rmap->range; 566 PetscInt *nprocs,j,idx,nsends,row; 567 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 568 PetscInt *rvalues,count,base,slen,*source; 569 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 570 MPI_Comm comm = ((PetscObject)A)->comm; 571 MPI_Request *send_waits,*recv_waits; 572 MPI_Status recv_status,*send_status; 573 #if defined(PETSC_DEBUG) 574 PetscTruth found = PETSC_FALSE; 575 #endif 576 577 PetscFunctionBegin; 578 /* first count number of contributors to each processor */ 579 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 580 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 581 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 582 j = 0; 583 for (i=0; i<N; i++) { 584 if (lastidx > (idx = rows[i])) j = 0; 585 lastidx = idx; 586 for (; j<size; j++) { 587 if (idx >= owners[j] && idx < owners[j+1]) { 588 nprocs[2*j]++; 589 nprocs[2*j+1] = 1; 590 owner[i] = j; 591 #if defined(PETSC_DEBUG) 592 found = PETSC_TRUE; 593 #endif 594 break; 595 } 596 } 597 #if defined(PETSC_DEBUG) 598 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 599 found = PETSC_FALSE; 600 #endif 601 } 602 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 603 604 /* inform other processors of number of messages and max length*/ 605 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 606 607 /* post receives: */ 608 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 609 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 610 for (i=0; i<nrecvs; i++) { 611 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 612 } 613 614 /* do sends: 615 1) starts[i] gives the starting index in svalues for stuff going to 616 the ith processor 617 */ 618 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 619 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 620 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 621 starts[0] = 0; 622 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 623 for (i=0; i<N; i++) { 624 svalues[starts[owner[i]]++] = rows[i]; 625 } 626 627 starts[0] = 0; 628 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 629 count = 0; 630 for (i=0; i<size; i++) { 631 if (nprocs[2*i+1]) { 632 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscFree(starts);CHKERRQ(ierr); 636 637 base = owners[rank]; 638 639 /* wait on receives */ 640 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 641 count = nrecvs; slen = 0; 642 while (count) { 643 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 644 /* unpack receives into our local space */ 645 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 646 source[imdex] = recv_status.MPI_SOURCE; 647 lens[imdex] = n; 648 slen += n; 649 count--; 650 } 651 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 652 653 /* move the data into the send scatter */ 654 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 655 count = 0; 656 for (i=0; i<nrecvs; i++) { 657 values = rvalues + i*nmax; 658 for (j=0; j<lens[i]; j++) { 659 lrows[count++] = values[j] - base; 660 } 661 } 662 ierr = PetscFree(rvalues);CHKERRQ(ierr); 663 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 664 ierr = PetscFree(owner);CHKERRQ(ierr); 665 ierr = PetscFree(nprocs);CHKERRQ(ierr); 666 667 /* actually zap the local rows */ 668 /* 669 Zero the required rows. If the "diagonal block" of the matrix 670 is square and the user wishes to set the diagonal we use separate 671 code so that MatSetValues() is not called for each diagonal allocating 672 new memory, thus calling lots of mallocs and slowing things down. 673 674 */ 675 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 676 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 677 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 678 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 679 } else if (diag != 0.0) { 680 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 681 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 682 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 684 } 685 for (i = 0; i < slen; i++) { 686 row = lrows[i] + rstart; 687 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 } else { 692 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 693 } 694 ierr = PetscFree(lrows);CHKERRQ(ierr); 695 696 /* wait on sends */ 697 if (nsends) { 698 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 699 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 700 ierr = PetscFree(send_status);CHKERRQ(ierr); 701 } 702 ierr = PetscFree(send_waits);CHKERRQ(ierr); 703 ierr = PetscFree(svalues);CHKERRQ(ierr); 704 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatMult_MPIAIJ" 710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 711 { 712 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscInt nt; 715 716 PetscFunctionBegin; 717 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 718 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 719 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 721 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 729 { 730 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 733 PetscFunctionBegin; 734 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatMultAdd_MPIAIJ" 740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 748 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 758 PetscErrorCode ierr; 759 PetscTruth merged; 760 761 PetscFunctionBegin; 762 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 763 /* do nondiagonal part */ 764 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 765 if (!merged) { 766 /* send it on its way */ 767 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 /* do local part */ 769 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 770 /* receive remote parts: note this assumes the values are not actually */ 771 /* added in yy until the next line, */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } else { 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* send it on its way */ 777 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 /* values actually were received in the Begin() but we need to call this nop */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 EXTERN_C_BEGIN 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 788 { 789 MPI_Comm comm; 790 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 791 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 792 IS Me,Notme; 793 PetscErrorCode ierr; 794 PetscInt M,N,first,last,*notme,i; 795 PetscMPIInt size; 796 797 PetscFunctionBegin; 798 799 /* Easy test: symmetric diagonal block */ 800 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 801 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 802 if (!*f) PetscFunctionReturn(0); 803 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 804 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 805 if (size == 1) PetscFunctionReturn(0); 806 807 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 808 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 809 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 810 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 811 for (i=0; i<first; i++) notme[i] = i; 812 for (i=last; i<M; i++) notme[i-last+first] = i; 813 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 814 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 815 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 816 Aoff = Aoffs[0]; 817 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 818 Boff = Boffs[0]; 819 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 820 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 821 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 822 ierr = ISDestroy(Me);CHKERRQ(ierr); 823 ierr = ISDestroy(Notme);CHKERRQ(ierr); 824 825 PetscFunctionReturn(0); 826 } 827 EXTERN_C_END 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 /* do nondiagonal part */ 838 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 839 /* send it on its way */ 840 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 841 /* do local part */ 842 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 843 /* receive remote parts */ 844 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 /* 849 This only works correctly for square matrices where the subblock A->A is the 850 diagonal block 851 */ 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 855 { 856 PetscErrorCode ierr; 857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 858 859 PetscFunctionBegin; 860 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 861 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 862 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatScale_MPIAIJ" 868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 869 { 870 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 875 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatDestroy_MPIAIJ" 881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 882 { 883 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 #if defined(PETSC_USE_LOG) 888 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 889 #endif 890 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 891 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 892 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 893 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 894 #if defined (PETSC_USE_CTABLE) 895 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 896 #else 897 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 898 #endif 899 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 900 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 901 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 902 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 903 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 904 ierr = PetscFree(aij);CHKERRQ(ierr); 905 906 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatView_MPIAIJ_Binary" 920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 921 { 922 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 923 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 924 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 925 PetscErrorCode ierr; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 int fd; 928 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 929 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 930 PetscScalar *column_values; 931 932 PetscFunctionBegin; 933 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 934 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 935 nz = A->nz + B->nz; 936 if (!rank) { 937 header[0] = MAT_FILE_CLASSID; 938 header[1] = mat->rmap->N; 939 header[2] = mat->cmap->N; 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 942 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 943 /* get largest number of rows any processor has */ 944 rlen = mat->rmap->n; 945 range = mat->rmap->range; 946 for (i=1; i<size; i++) { 947 rlen = PetscMax(rlen,range[i+1] - range[i]); 948 } 949 } else { 950 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 951 rlen = mat->rmap->n; 952 } 953 954 /* load up the local row counts */ 955 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 956 for (i=0; i<mat->rmap->n; i++) { 957 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 958 } 959 960 /* store the row lengths to the file */ 961 if (!rank) { 962 MPI_Status status; 963 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 964 for (i=1; i<size; i++) { 965 rlen = range[i+1] - range[i]; 966 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 967 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 } 969 } else { 970 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 971 } 972 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 973 974 /* load up the local column indices */ 975 nzmax = nz; /* )th processor needs space a largest processor needs */ 976 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 978 cnt = 0; 979 for (i=0; i<mat->rmap->n; i++) { 980 for (j=B->i[i]; j<B->i[i+1]; j++) { 981 if ( (col = garray[B->j[j]]) > cstart) break; 982 column_indices[cnt++] = col; 983 } 984 for (k=A->i[i]; k<A->i[i+1]; k++) { 985 column_indices[cnt++] = A->j[k] + cstart; 986 } 987 for (; j<B->i[i+1]; j++) { 988 column_indices[cnt++] = garray[B->j[j]]; 989 } 990 } 991 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 992 993 /* store the column indices to the file */ 994 if (!rank) { 995 MPI_Status status; 996 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 for (i=1; i<size; i++) { 998 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 999 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1000 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1001 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1002 } 1003 } else { 1004 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1005 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1008 1009 /* load up the local column values */ 1010 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1011 cnt = 0; 1012 for (i=0; i<mat->rmap->n; i++) { 1013 for (j=B->i[i]; j<B->i[i+1]; j++) { 1014 if ( garray[B->j[j]] > cstart) break; 1015 column_values[cnt++] = B->a[j]; 1016 } 1017 for (k=A->i[i]; k<A->i[i+1]; k++) { 1018 column_values[cnt++] = A->a[k]; 1019 } 1020 for (; j<B->i[i+1]; j++) { 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 } 1024 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1025 1026 /* store the column values to the file */ 1027 if (!rank) { 1028 MPI_Status status; 1029 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1030 for (i=1; i<size; i++) { 1031 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1032 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1033 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1034 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1035 } 1036 } else { 1037 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1038 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1039 } 1040 ierr = PetscFree(column_values);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1047 { 1048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1049 PetscErrorCode ierr; 1050 PetscMPIInt rank = aij->rank,size = aij->size; 1051 PetscTruth isdraw,iascii,isbinary; 1052 PetscViewer sviewer; 1053 PetscViewerFormat format; 1054 1055 PetscFunctionBegin; 1056 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1059 if (iascii) { 1060 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1061 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1062 MatInfo info; 1063 PetscTruth inodes; 1064 1065 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1066 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1067 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1068 if (!inodes) { 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1070 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1071 } else { 1072 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1073 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1074 } 1075 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1077 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1079 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1081 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1084 PetscInt inodecount,inodelimit,*inodes; 1085 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1086 if (inodes) { 1087 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1090 } 1091 PetscFunctionReturn(0); 1092 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1093 PetscFunctionReturn(0); 1094 } 1095 } else if (isbinary) { 1096 if (size == 1) { 1097 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1098 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1099 } else { 1100 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } else if (isdraw) { 1104 PetscDraw draw; 1105 PetscTruth isnull; 1106 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1107 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1108 } 1109 1110 if (size == 1) { 1111 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1112 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1113 } else { 1114 /* assemble the entire matrix onto first processor. */ 1115 Mat A; 1116 Mat_SeqAIJ *Aloc; 1117 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1118 MatScalar *a; 1119 1120 if (mat->rmap->N > 1024) { 1121 PetscTruth flg = PETSC_FALSE; 1122 1123 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1124 if (!flg) { 1125 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1126 } 1127 } 1128 1129 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1130 if (!rank) { 1131 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1132 } else { 1133 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1134 } 1135 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1136 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqAIJ*)aij->A->data; 1142 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 row = mat->rmap->rstart; 1144 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1148 } 1149 aj = Aloc->j; 1150 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1151 1152 /* copy over the B part */ 1153 Aloc = (Mat_SeqAIJ*)aij->B->data; 1154 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1155 row = mat->rmap->rstart; 1156 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1157 ct = cols; 1158 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1159 for (i=0; i<m; i++) { 1160 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1161 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1162 } 1163 ierr = PetscFree(ct);CHKERRQ(ierr); 1164 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 /* 1167 Everyone has to call to draw the matrix since the graphics waits are 1168 synchronized across all processors that share the PetscDraw object 1169 */ 1170 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 if (!rank) { 1172 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1173 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1176 ierr = MatDestroy(A);CHKERRQ(ierr); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatView_MPIAIJ" 1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1184 { 1185 PetscErrorCode ierr; 1186 PetscTruth iascii,isdraw,issocket,isbinary; 1187 1188 PetscFunctionBegin; 1189 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1193 if (iascii || isdraw || isbinary || issocket) { 1194 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1195 } else { 1196 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSOR_MPIAIJ" 1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1204 { 1205 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1206 PetscErrorCode ierr; 1207 Vec bb1 = 0; 1208 PetscTruth hasop; 1209 1210 PetscFunctionBegin; 1211 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1212 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1213 } 1214 1215 if (flag == SOR_APPLY_UPPER) { 1216 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1221 if (flag & SOR_ZERO_INITIAL_GUESS) { 1222 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 its--; 1224 } 1225 1226 while (its--) { 1227 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 1230 /* update rhs: bb1 = bb - B*x */ 1231 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1233 1234 /* local sweep */ 1235 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1236 } 1237 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1238 if (flag & SOR_ZERO_INITIAL_GUESS) { 1239 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1240 its--; 1241 } 1242 while (its--) { 1243 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 1246 /* update rhs: bb1 = bb - B*x */ 1247 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1249 1250 /* local sweep */ 1251 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1252 } 1253 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1254 if (flag & SOR_ZERO_INITIAL_GUESS) { 1255 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1256 its--; 1257 } 1258 while (its--) { 1259 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 1262 /* update rhs: bb1 = bb - B*x */ 1263 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1264 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1265 1266 /* local sweep */ 1267 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1268 } 1269 } else if (flag & SOR_EISENSTAT) { 1270 Vec xx1; 1271 1272 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1273 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1274 1275 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 if (!mat->diag) { 1278 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1279 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1280 } 1281 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1282 if (hasop) { 1283 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1284 } else { 1285 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1286 } 1287 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1288 1289 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1290 1291 /* local sweep */ 1292 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1293 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1294 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1295 } else { 1296 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1297 } 1298 1299 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPermute_MPIAIJ" 1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1306 { 1307 MPI_Comm comm,pcomm; 1308 PetscInt first,local_size,nrows; 1309 const PetscInt *rows; 1310 PetscMPIInt size; 1311 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1316 /* make a collective version of 'rowp' */ 1317 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1318 if (pcomm==comm) { 1319 crowp = rowp; 1320 } else { 1321 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1322 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1323 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1325 } 1326 /* collect the global row permutation and invert it */ 1327 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1328 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1329 if (pcomm!=comm) { 1330 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1331 } 1332 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1333 /* get the local target indices */ 1334 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1337 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1338 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1340 /* the column permutation is so much easier; 1341 make a local version of 'colp' and invert it */ 1342 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1344 if (size==1) { 1345 lcolp = colp; 1346 } else { 1347 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1348 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1349 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1350 } 1351 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1352 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1353 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1354 if (size>1) { 1355 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1357 } 1358 /* now we just get the submatrix */ 1359 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1360 /* clean up */ 1361 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1362 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1371 Mat A = mat->A,B = mat->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = 1.0; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } 1404 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1405 info->fill_ratio_needed = 0; 1406 info->factor_mallocs = 0; 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetOption_MPIAIJ" 1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 switch (op) { 1420 case MAT_NEW_NONZERO_LOCATIONS: 1421 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1422 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1423 case MAT_KEEP_NONZERO_PATTERN: 1424 case MAT_NEW_NONZERO_LOCATION_ERR: 1425 case MAT_USE_INODES: 1426 case MAT_IGNORE_ZERO_ENTRIES: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = PETSC_TRUE; 1440 break; 1441 case MAT_SYMMETRIC: 1442 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1443 break; 1444 case MAT_STRUCTURALLY_SYMMETRIC: 1445 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1446 break; 1447 case MAT_HERMITIAN: 1448 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1449 break; 1450 case MAT_SYMMETRY_ETERNAL: 1451 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1452 break; 1453 default: 1454 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatGetRow_MPIAIJ" 1461 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1462 { 1463 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1464 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1465 PetscErrorCode ierr; 1466 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1467 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1468 PetscInt *cmap,*idx_p; 1469 1470 PetscFunctionBegin; 1471 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1472 mat->getrowactive = PETSC_TRUE; 1473 1474 if (!mat->rowvalues && (idx || v)) { 1475 /* 1476 allocate enough space to hold information from the longest row. 1477 */ 1478 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1479 PetscInt max = 1,tmp; 1480 for (i=0; i<matin->rmap->n; i++) { 1481 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1482 if (max < tmp) { max = tmp; } 1483 } 1484 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1485 } 1486 1487 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1488 lrow = row - rstart; 1489 1490 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1491 if (!v) {pvA = 0; pvB = 0;} 1492 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1493 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1494 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1495 nztot = nzA + nzB; 1496 1497 cmap = mat->garray; 1498 if (v || idx) { 1499 if (nztot) { 1500 /* Sort by increasing column numbers, assuming A and B already sorted */ 1501 PetscInt imark = -1; 1502 if (v) { 1503 *v = v_p = mat->rowvalues; 1504 for (i=0; i<nzB; i++) { 1505 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1506 else break; 1507 } 1508 imark = i; 1509 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1510 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1511 } 1512 if (idx) { 1513 *idx = idx_p = mat->rowindices; 1514 if (imark > -1) { 1515 for (i=0; i<imark; i++) { 1516 idx_p[i] = cmap[cworkB[i]]; 1517 } 1518 } else { 1519 for (i=0; i<nzB; i++) { 1520 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1521 else break; 1522 } 1523 imark = i; 1524 } 1525 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1526 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1527 } 1528 } else { 1529 if (idx) *idx = 0; 1530 if (v) *v = 0; 1531 } 1532 } 1533 *nz = nztot; 1534 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1535 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1541 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1542 { 1543 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1544 1545 PetscFunctionBegin; 1546 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1547 aij->getrowactive = PETSC_FALSE; 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatNorm_MPIAIJ" 1553 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1554 { 1555 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1556 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1557 PetscErrorCode ierr; 1558 PetscInt i,j,cstart = mat->cmap->rstart; 1559 PetscReal sum = 0.0; 1560 MatScalar *v; 1561 1562 PetscFunctionBegin; 1563 if (aij->size == 1) { 1564 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1565 } else { 1566 if (type == NORM_FROBENIUS) { 1567 v = amat->a; 1568 for (i=0; i<amat->nz; i++) { 1569 #if defined(PETSC_USE_COMPLEX) 1570 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1571 #else 1572 sum += (*v)*(*v); v++; 1573 #endif 1574 } 1575 v = bmat->a; 1576 for (i=0; i<bmat->nz; i++) { 1577 #if defined(PETSC_USE_COMPLEX) 1578 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1579 #else 1580 sum += (*v)*(*v); v++; 1581 #endif 1582 } 1583 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1584 *norm = sqrt(*norm); 1585 } else if (type == NORM_1) { /* max column norm */ 1586 PetscReal *tmp,*tmp2; 1587 PetscInt *jj,*garray = aij->garray; 1588 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1589 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1590 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1591 *norm = 0.0; 1592 v = amat->a; jj = amat->j; 1593 for (j=0; j<amat->nz; j++) { 1594 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1595 } 1596 v = bmat->a; jj = bmat->j; 1597 for (j=0; j<bmat->nz; j++) { 1598 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1599 } 1600 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1601 for (j=0; j<mat->cmap->N; j++) { 1602 if (tmp2[j] > *norm) *norm = tmp2[j]; 1603 } 1604 ierr = PetscFree(tmp);CHKERRQ(ierr); 1605 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1606 } else if (type == NORM_INFINITY) { /* max row norm */ 1607 PetscReal ntemp = 0.0; 1608 for (j=0; j<aij->A->rmap->n; j++) { 1609 v = amat->a + amat->i[j]; 1610 sum = 0.0; 1611 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1612 sum += PetscAbsScalar(*v); v++; 1613 } 1614 v = bmat->a + bmat->i[j]; 1615 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1616 sum += PetscAbsScalar(*v); v++; 1617 } 1618 if (sum > ntemp) ntemp = sum; 1619 } 1620 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1621 } else { 1622 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1623 } 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatTranspose_MPIAIJ" 1630 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1631 { 1632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1633 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1634 PetscErrorCode ierr; 1635 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1636 PetscInt cstart=A->cmap->rstart,ncol; 1637 Mat B; 1638 MatScalar *array; 1639 1640 PetscFunctionBegin; 1641 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1642 1643 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1644 ai = Aloc->i; aj = Aloc->j; 1645 bi = Bloc->i; bj = Bloc->j; 1646 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1647 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1648 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1649 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1650 for (i=0; i<ai[ma]; i++){ 1651 d_nnz[aj[i]] ++; 1652 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1653 } 1654 1655 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1656 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1657 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1658 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1659 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1660 } else { 1661 B = *matout; 1662 } 1663 1664 /* copy over the A part */ 1665 array = Aloc->a; 1666 row = A->rmap->rstart; 1667 for (i=0; i<ma; i++) { 1668 ncol = ai[i+1]-ai[i]; 1669 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1670 row++; array += ncol; aj += ncol; 1671 } 1672 aj = Aloc->j; 1673 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1674 1675 /* copy over the B part */ 1676 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1677 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1678 array = Bloc->a; 1679 row = A->rmap->rstart; 1680 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1681 cols_tmp = cols; 1682 for (i=0; i<mb; i++) { 1683 ncol = bi[i+1]-bi[i]; 1684 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1685 row++; array += ncol; cols_tmp += ncol; 1686 } 1687 ierr = PetscFree(cols);CHKERRQ(ierr); 1688 1689 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1690 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1692 *matout = B; 1693 } else { 1694 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1695 } 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1701 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1702 { 1703 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1704 Mat a = aij->A,b = aij->B; 1705 PetscErrorCode ierr; 1706 PetscInt s1,s2,s3; 1707 1708 PetscFunctionBegin; 1709 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1710 if (rr) { 1711 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1712 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1713 /* Overlap communication with computation. */ 1714 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1715 } 1716 if (ll) { 1717 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1718 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1719 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1720 } 1721 /* scale the diagonal block */ 1722 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1723 1724 if (rr) { 1725 /* Do a scatter end and then right scale the off-diagonal block */ 1726 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1727 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1728 } 1729 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1735 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1736 { 1737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1738 PetscErrorCode ierr; 1739 1740 PetscFunctionBegin; 1741 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1742 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1743 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1744 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1745 PetscFunctionReturn(0); 1746 } 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1749 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1750 { 1751 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1752 PetscErrorCode ierr; 1753 1754 PetscFunctionBegin; 1755 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatEqual_MPIAIJ" 1761 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1762 { 1763 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1764 Mat a,b,c,d; 1765 PetscTruth flg; 1766 PetscErrorCode ierr; 1767 1768 PetscFunctionBegin; 1769 a = matA->A; b = matA->B; 1770 c = matB->A; d = matB->B; 1771 1772 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1773 if (flg) { 1774 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1775 } 1776 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatCopy_MPIAIJ" 1782 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1783 { 1784 PetscErrorCode ierr; 1785 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1786 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1787 1788 PetscFunctionBegin; 1789 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1790 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1791 /* because of the column compression in the off-processor part of the matrix a->B, 1792 the number of columns in a->B and b->B may be different, hence we cannot call 1793 the MatCopy() directly on the two parts. If need be, we can provide a more 1794 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1795 then copying the submatrices */ 1796 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1797 } else { 1798 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1799 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1806 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1807 { 1808 PetscErrorCode ierr; 1809 1810 PetscFunctionBegin; 1811 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatAXPY_MPIAIJ" 1817 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1818 { 1819 PetscErrorCode ierr; 1820 PetscInt i; 1821 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1822 PetscBLASInt bnz,one=1; 1823 Mat_SeqAIJ *x,*y; 1824 1825 PetscFunctionBegin; 1826 if (str == SAME_NONZERO_PATTERN) { 1827 PetscScalar alpha = a; 1828 x = (Mat_SeqAIJ *)xx->A->data; 1829 y = (Mat_SeqAIJ *)yy->A->data; 1830 bnz = PetscBLASIntCast(x->nz); 1831 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1832 x = (Mat_SeqAIJ *)xx->B->data; 1833 y = (Mat_SeqAIJ *)yy->B->data; 1834 bnz = PetscBLASIntCast(x->nz); 1835 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1836 } else if (str == SUBSET_NONZERO_PATTERN) { 1837 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1838 1839 x = (Mat_SeqAIJ *)xx->B->data; 1840 y = (Mat_SeqAIJ *)yy->B->data; 1841 if (y->xtoy && y->XtoY != xx->B) { 1842 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1843 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1844 } 1845 if (!y->xtoy) { /* get xtoy */ 1846 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1847 y->XtoY = xx->B; 1848 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1849 } 1850 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1851 } else { 1852 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1853 } 1854 PetscFunctionReturn(0); 1855 } 1856 1857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatConjugate_MPIAIJ" 1861 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1862 { 1863 #if defined(PETSC_USE_COMPLEX) 1864 PetscErrorCode ierr; 1865 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1866 1867 PetscFunctionBegin; 1868 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1869 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1870 #else 1871 PetscFunctionBegin; 1872 #endif 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatRealPart_MPIAIJ" 1878 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1879 { 1880 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1881 PetscErrorCode ierr; 1882 1883 PetscFunctionBegin; 1884 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1885 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1891 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1892 { 1893 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1894 PetscErrorCode ierr; 1895 1896 PetscFunctionBegin; 1897 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1898 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #ifdef PETSC_HAVE_PBGL 1903 1904 #include <boost/parallel/mpi/bsp_process_group.hpp> 1905 #include <boost/graph/distributed/ilu_default_graph.hpp> 1906 #include <boost/graph/distributed/ilu_0_block.hpp> 1907 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1908 #include <boost/graph/distributed/petsc/interface.hpp> 1909 #include <boost/multi_array.hpp> 1910 #include <boost/parallel/distributed_property_map->hpp> 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1914 /* 1915 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1916 */ 1917 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1918 { 1919 namespace petsc = boost::distributed::petsc; 1920 1921 namespace graph_dist = boost::graph::distributed; 1922 using boost::graph::distributed::ilu_default::process_group_type; 1923 using boost::graph::ilu_permuted; 1924 1925 PetscTruth row_identity, col_identity; 1926 PetscContainer c; 1927 PetscInt m, n, M, N; 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1932 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1933 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1934 if (!row_identity || !col_identity) { 1935 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1936 } 1937 1938 process_group_type pg; 1939 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1940 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1941 lgraph_type& level_graph = *lgraph_p; 1942 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1943 1944 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1945 ilu_permuted(level_graph); 1946 1947 /* put together the new matrix */ 1948 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1949 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1950 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1951 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1952 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1953 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 1956 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1957 ierr = PetscContainerSetPointer(c, lgraph_p); 1958 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1964 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1965 { 1966 PetscFunctionBegin; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatSolve_MPIAIJ" 1972 /* 1973 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1974 */ 1975 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1976 { 1977 namespace graph_dist = boost::graph::distributed; 1978 1979 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1980 lgraph_type* lgraph_p; 1981 PetscContainer c; 1982 PetscErrorCode ierr; 1983 1984 PetscFunctionBegin; 1985 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1986 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1987 ierr = VecCopy(b, x);CHKERRQ(ierr); 1988 1989 PetscScalar* array_x; 1990 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1991 PetscInt sx; 1992 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1993 1994 PetscScalar* array_b; 1995 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1996 PetscInt sb; 1997 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1998 1999 lgraph_type& level_graph = *lgraph_p; 2000 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2001 2002 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2003 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2004 ref_x(array_x, boost::extents[num_vertices(graph)]); 2005 2006 typedef boost::iterator_property_map<array_ref_type::iterator, 2007 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2008 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2009 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2010 2011 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2012 2013 PetscFunctionReturn(0); 2014 } 2015 #endif 2016 2017 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2018 PetscInt nzlocal,nsends,nrecvs; 2019 PetscMPIInt *send_rank,*recv_rank; 2020 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2021 PetscScalar *sbuf_a,**rbuf_a; 2022 PetscErrorCode (*MatDestroy)(Mat); 2023 } Mat_Redundant; 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2027 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2028 { 2029 PetscErrorCode ierr; 2030 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2031 PetscInt i; 2032 2033 PetscFunctionBegin; 2034 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2035 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2036 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2037 for (i=0; i<redund->nrecvs; i++){ 2038 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2039 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2040 } 2041 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2042 ierr = PetscFree(redund);CHKERRQ(ierr); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatDestroy_MatRedundant" 2048 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2049 { 2050 PetscErrorCode ierr; 2051 PetscContainer container; 2052 Mat_Redundant *redund=PETSC_NULL; 2053 2054 PetscFunctionBegin; 2055 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2056 if (container) { 2057 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2058 } else { 2059 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2060 } 2061 A->ops->destroy = redund->MatDestroy; 2062 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2063 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2064 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2070 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2071 { 2072 PetscMPIInt rank,size; 2073 MPI_Comm comm=((PetscObject)mat)->comm; 2074 PetscErrorCode ierr; 2075 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2076 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2077 PetscInt *rowrange=mat->rmap->range; 2078 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2079 Mat A=aij->A,B=aij->B,C=*matredundant; 2080 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2081 PetscScalar *sbuf_a; 2082 PetscInt nzlocal=a->nz+b->nz; 2083 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2084 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2085 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2086 MatScalar *aworkA,*aworkB; 2087 PetscScalar *vals; 2088 PetscMPIInt tag1,tag2,tag3,imdex; 2089 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2090 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2091 MPI_Status recv_status,*send_status; 2092 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2093 PetscInt **rbuf_j=PETSC_NULL; 2094 PetscScalar **rbuf_a=PETSC_NULL; 2095 Mat_Redundant *redund=PETSC_NULL; 2096 PetscContainer container; 2097 2098 PetscFunctionBegin; 2099 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 2102 if (reuse == MAT_REUSE_MATRIX) { 2103 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2104 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2105 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2106 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2107 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2108 if (container) { 2109 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2110 } else { 2111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2112 } 2113 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2114 2115 nsends = redund->nsends; 2116 nrecvs = redund->nrecvs; 2117 send_rank = redund->send_rank; 2118 recv_rank = redund->recv_rank; 2119 sbuf_nz = redund->sbuf_nz; 2120 rbuf_nz = redund->rbuf_nz; 2121 sbuf_j = redund->sbuf_j; 2122 sbuf_a = redund->sbuf_a; 2123 rbuf_j = redund->rbuf_j; 2124 rbuf_a = redund->rbuf_a; 2125 } 2126 2127 if (reuse == MAT_INITIAL_MATRIX){ 2128 PetscMPIInt subrank,subsize; 2129 PetscInt nleftover,np_subcomm; 2130 /* get the destination processors' id send_rank, nsends and nrecvs */ 2131 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2132 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2133 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2134 np_subcomm = size/nsubcomm; 2135 nleftover = size - nsubcomm*np_subcomm; 2136 nsends = 0; nrecvs = 0; 2137 for (i=0; i<size; i++){ /* i=rank*/ 2138 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2139 send_rank[nsends] = i; nsends++; 2140 recv_rank[nrecvs++] = i; 2141 } 2142 } 2143 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2144 i = size-nleftover-1; 2145 j = 0; 2146 while (j < nsubcomm - nleftover){ 2147 send_rank[nsends++] = i; 2148 i--; j++; 2149 } 2150 } 2151 2152 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2153 for (i=0; i<nleftover; i++){ 2154 recv_rank[nrecvs++] = size-nleftover+i; 2155 } 2156 } 2157 2158 /* allocate sbuf_j, sbuf_a */ 2159 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2160 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2161 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2162 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2163 2164 /* copy mat's local entries into the buffers */ 2165 if (reuse == MAT_INITIAL_MATRIX){ 2166 rownz_max = 0; 2167 rptr = sbuf_j; 2168 cols = sbuf_j + rend-rstart + 1; 2169 vals = sbuf_a; 2170 rptr[0] = 0; 2171 for (i=0; i<rend-rstart; i++){ 2172 row = i + rstart; 2173 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2174 ncols = nzA + nzB; 2175 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2176 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2177 /* load the column indices for this row into cols */ 2178 lwrite = 0; 2179 for (l=0; l<nzB; l++) { 2180 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2181 vals[lwrite] = aworkB[l]; 2182 cols[lwrite++] = ctmp; 2183 } 2184 } 2185 for (l=0; l<nzA; l++){ 2186 vals[lwrite] = aworkA[l]; 2187 cols[lwrite++] = cstart + cworkA[l]; 2188 } 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 vals += ncols; 2196 cols += ncols; 2197 rptr[i+1] = rptr[i] + ncols; 2198 if (rownz_max < ncols) rownz_max = ncols; 2199 } 2200 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2201 } else { /* only copy matrix values into sbuf_a */ 2202 rptr = sbuf_j; 2203 vals = sbuf_a; 2204 rptr[0] = 0; 2205 for (i=0; i<rend-rstart; i++){ 2206 row = i + rstart; 2207 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2208 ncols = nzA + nzB; 2209 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2210 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2211 lwrite = 0; 2212 for (l=0; l<nzB; l++) { 2213 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2214 } 2215 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2216 for (l=0; l<nzB; l++) { 2217 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2218 } 2219 vals += ncols; 2220 rptr[i+1] = rptr[i] + ncols; 2221 } 2222 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2223 2224 /* send nzlocal to others, and recv other's nzlocal */ 2225 /*--------------------------------------------------*/ 2226 if (reuse == MAT_INITIAL_MATRIX){ 2227 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2228 s_waits2 = s_waits3 + nsends; 2229 s_waits1 = s_waits2 + nsends; 2230 r_waits1 = s_waits1 + nsends; 2231 r_waits2 = r_waits1 + nrecvs; 2232 r_waits3 = r_waits2 + nrecvs; 2233 } else { 2234 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2235 r_waits3 = s_waits3 + nsends; 2236 } 2237 2238 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2239 if (reuse == MAT_INITIAL_MATRIX){ 2240 /* get new tags to keep the communication clean */ 2241 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2242 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2243 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2244 2245 /* post receives of other's nzlocal */ 2246 for (i=0; i<nrecvs; i++){ 2247 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2248 } 2249 /* send nzlocal to others */ 2250 for (i=0; i<nsends; i++){ 2251 sbuf_nz[i] = nzlocal; 2252 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2253 } 2254 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2255 count = nrecvs; 2256 while (count) { 2257 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2258 recv_rank[imdex] = recv_status.MPI_SOURCE; 2259 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2260 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2261 2262 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2263 rbuf_nz[imdex] += i + 2; 2264 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2265 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2266 count--; 2267 } 2268 /* wait on sends of nzlocal */ 2269 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2270 /* send mat->i,j to others, and recv from other's */ 2271 /*------------------------------------------------*/ 2272 for (i=0; i<nsends; i++){ 2273 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2274 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2275 } 2276 /* wait on receives of mat->i,j */ 2277 /*------------------------------*/ 2278 count = nrecvs; 2279 while (count) { 2280 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2281 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2282 count--; 2283 } 2284 /* wait on sends of mat->i,j */ 2285 /*---------------------------*/ 2286 if (nsends) { 2287 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2288 } 2289 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2290 2291 /* post receives, send and receive mat->a */ 2292 /*----------------------------------------*/ 2293 for (imdex=0; imdex<nrecvs; imdex++) { 2294 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2295 } 2296 for (i=0; i<nsends; i++){ 2297 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2298 } 2299 count = nrecvs; 2300 while (count) { 2301 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2302 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2303 count--; 2304 } 2305 if (nsends) { 2306 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2307 } 2308 2309 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2310 2311 /* create redundant matrix */ 2312 /*-------------------------*/ 2313 if (reuse == MAT_INITIAL_MATRIX){ 2314 /* compute rownz_max for preallocation */ 2315 for (imdex=0; imdex<nrecvs; imdex++){ 2316 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2317 rptr = rbuf_j[imdex]; 2318 for (i=0; i<j; i++){ 2319 ncols = rptr[i+1] - rptr[i]; 2320 if (rownz_max < ncols) rownz_max = ncols; 2321 } 2322 } 2323 2324 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2325 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2326 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2327 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2328 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2329 } else { 2330 C = *matredundant; 2331 } 2332 2333 /* insert local matrix entries */ 2334 rptr = sbuf_j; 2335 cols = sbuf_j + rend-rstart + 1; 2336 vals = sbuf_a; 2337 for (i=0; i<rend-rstart; i++){ 2338 row = i + rstart; 2339 ncols = rptr[i+1] - rptr[i]; 2340 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2341 vals += ncols; 2342 cols += ncols; 2343 } 2344 /* insert received matrix entries */ 2345 for (imdex=0; imdex<nrecvs; imdex++){ 2346 rstart = rowrange[recv_rank[imdex]]; 2347 rend = rowrange[recv_rank[imdex]+1]; 2348 rptr = rbuf_j[imdex]; 2349 cols = rbuf_j[imdex] + rend-rstart + 1; 2350 vals = rbuf_a[imdex]; 2351 for (i=0; i<rend-rstart; i++){ 2352 row = i + rstart; 2353 ncols = rptr[i+1] - rptr[i]; 2354 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2355 vals += ncols; 2356 cols += ncols; 2357 } 2358 } 2359 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2362 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2363 if (reuse == MAT_INITIAL_MATRIX){ 2364 PetscContainer container; 2365 *matredundant = C; 2366 /* create a supporting struct and attach it to C for reuse */ 2367 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2368 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2369 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2370 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2371 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2372 2373 redund->nzlocal = nzlocal; 2374 redund->nsends = nsends; 2375 redund->nrecvs = nrecvs; 2376 redund->send_rank = send_rank; 2377 redund->recv_rank = recv_rank; 2378 redund->sbuf_nz = sbuf_nz; 2379 redund->rbuf_nz = rbuf_nz; 2380 redund->sbuf_j = sbuf_j; 2381 redund->sbuf_a = sbuf_a; 2382 redund->rbuf_j = rbuf_j; 2383 redund->rbuf_a = rbuf_a; 2384 2385 redund->MatDestroy = C->ops->destroy; 2386 C->ops->destroy = MatDestroy_MatRedundant; 2387 } 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2393 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2394 { 2395 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2396 PetscErrorCode ierr; 2397 PetscInt i,*idxb = 0; 2398 PetscScalar *va,*vb; 2399 Vec vtmp; 2400 2401 PetscFunctionBegin; 2402 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2403 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2404 if (idx) { 2405 for (i=0; i<A->rmap->n; i++) { 2406 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2407 } 2408 } 2409 2410 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2411 if (idx) { 2412 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2413 } 2414 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2415 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2416 2417 for (i=0; i<A->rmap->n; i++){ 2418 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2419 va[i] = vb[i]; 2420 if (idx) idx[i] = a->garray[idxb[i]]; 2421 } 2422 } 2423 2424 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2425 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2426 if (idxb) { 2427 ierr = PetscFree(idxb);CHKERRQ(ierr); 2428 } 2429 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2435 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2436 { 2437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2438 PetscErrorCode ierr; 2439 PetscInt i,*idxb = 0; 2440 PetscScalar *va,*vb; 2441 Vec vtmp; 2442 2443 PetscFunctionBegin; 2444 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2445 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2446 if (idx) { 2447 for (i=0; i<A->cmap->n; i++) { 2448 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2449 } 2450 } 2451 2452 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2453 if (idx) { 2454 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2455 } 2456 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2457 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2458 2459 for (i=0; i<A->rmap->n; i++){ 2460 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2461 va[i] = vb[i]; 2462 if (idx) idx[i] = a->garray[idxb[i]]; 2463 } 2464 } 2465 2466 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2467 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2468 if (idxb) { 2469 ierr = PetscFree(idxb);CHKERRQ(ierr); 2470 } 2471 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 #undef __FUNCT__ 2476 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2477 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2478 { 2479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2480 PetscInt n = A->rmap->n; 2481 PetscInt cstart = A->cmap->rstart; 2482 PetscInt *cmap = mat->garray; 2483 PetscInt *diagIdx, *offdiagIdx; 2484 Vec diagV, offdiagV; 2485 PetscScalar *a, *diagA, *offdiagA; 2486 PetscInt r; 2487 PetscErrorCode ierr; 2488 2489 PetscFunctionBegin; 2490 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2491 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2492 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2493 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2494 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2495 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2496 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2497 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2498 for(r = 0; r < n; ++r) { 2499 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2500 a[r] = diagA[r]; 2501 idx[r] = cstart + diagIdx[r]; 2502 } else { 2503 a[r] = offdiagA[r]; 2504 idx[r] = cmap[offdiagIdx[r]]; 2505 } 2506 } 2507 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2508 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2509 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2510 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2511 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2512 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2518 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2519 { 2520 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2521 PetscInt n = A->rmap->n; 2522 PetscInt cstart = A->cmap->rstart; 2523 PetscInt *cmap = mat->garray; 2524 PetscInt *diagIdx, *offdiagIdx; 2525 Vec diagV, offdiagV; 2526 PetscScalar *a, *diagA, *offdiagA; 2527 PetscInt r; 2528 PetscErrorCode ierr; 2529 2530 PetscFunctionBegin; 2531 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2532 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2533 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2534 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2535 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2536 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2537 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2538 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2539 for(r = 0; r < n; ++r) { 2540 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2541 a[r] = diagA[r]; 2542 idx[r] = cstart + diagIdx[r]; 2543 } else { 2544 a[r] = offdiagA[r]; 2545 idx[r] = cmap[offdiagIdx[r]]; 2546 } 2547 } 2548 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2549 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2550 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2551 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2552 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2553 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2559 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2560 { 2561 PetscErrorCode ierr; 2562 Mat *dummy; 2563 2564 PetscFunctionBegin; 2565 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2566 *newmat = *dummy; 2567 ierr = PetscFree(dummy);CHKERRQ(ierr); 2568 PetscFunctionReturn(0); 2569 } 2570 2571 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2572 /* -------------------------------------------------------------------*/ 2573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2574 MatGetRow_MPIAIJ, 2575 MatRestoreRow_MPIAIJ, 2576 MatMult_MPIAIJ, 2577 /* 4*/ MatMultAdd_MPIAIJ, 2578 MatMultTranspose_MPIAIJ, 2579 MatMultTransposeAdd_MPIAIJ, 2580 #ifdef PETSC_HAVE_PBGL 2581 MatSolve_MPIAIJ, 2582 #else 2583 0, 2584 #endif 2585 0, 2586 0, 2587 /*10*/ 0, 2588 0, 2589 0, 2590 MatSOR_MPIAIJ, 2591 MatTranspose_MPIAIJ, 2592 /*15*/ MatGetInfo_MPIAIJ, 2593 MatEqual_MPIAIJ, 2594 MatGetDiagonal_MPIAIJ, 2595 MatDiagonalScale_MPIAIJ, 2596 MatNorm_MPIAIJ, 2597 /*20*/ MatAssemblyBegin_MPIAIJ, 2598 MatAssemblyEnd_MPIAIJ, 2599 MatSetOption_MPIAIJ, 2600 MatZeroEntries_MPIAIJ, 2601 /*24*/ MatZeroRows_MPIAIJ, 2602 0, 2603 #ifdef PETSC_HAVE_PBGL 2604 0, 2605 #else 2606 0, 2607 #endif 2608 0, 2609 0, 2610 /*29*/ MatSetUpPreallocation_MPIAIJ, 2611 #ifdef PETSC_HAVE_PBGL 2612 0, 2613 #else 2614 0, 2615 #endif 2616 0, 2617 0, 2618 0, 2619 /*34*/ MatDuplicate_MPIAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*39*/ MatAXPY_MPIAIJ, 2625 MatGetSubMatrices_MPIAIJ, 2626 MatIncreaseOverlap_MPIAIJ, 2627 MatGetValues_MPIAIJ, 2628 MatCopy_MPIAIJ, 2629 /*44*/ MatGetRowMax_MPIAIJ, 2630 MatScale_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 /*49*/ MatSetBlockSize_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*54*/ MatFDColoringCreate_MPIAIJ, 2640 0, 2641 MatSetUnfactored_MPIAIJ, 2642 MatPermute_MPIAIJ, 2643 0, 2644 /*59*/ MatGetSubMatrix_MPIAIJ, 2645 MatDestroy_MPIAIJ, 2646 MatView_MPIAIJ, 2647 0, 2648 0, 2649 /*64*/ 0, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2655 MatGetRowMinAbs_MPIAIJ, 2656 0, 2657 MatSetColoring_MPIAIJ, 2658 #if defined(PETSC_HAVE_ADIC) 2659 MatSetValuesAdic_MPIAIJ, 2660 #else 2661 0, 2662 #endif 2663 MatSetValuesAdifor_MPIAIJ, 2664 /*75*/ MatFDColoringApply_AIJ, 2665 0, 2666 0, 2667 0, 2668 0, 2669 /*80*/ 0, 2670 0, 2671 0, 2672 /*83*/ 0, 2673 0, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2679 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2680 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2681 MatPtAP_Basic, 2682 MatPtAPSymbolic_MPIAIJ, 2683 /*94*/ MatPtAPNumeric_MPIAIJ, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*99*/ 0, 2689 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2690 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2691 MatConjugate_MPIAIJ, 2692 0, 2693 /*104*/MatSetValuesRow_MPIAIJ, 2694 MatRealPart_MPIAIJ, 2695 MatImaginaryPart_MPIAIJ, 2696 0, 2697 0, 2698 /*109*/0, 2699 MatGetRedundantMatrix_MPIAIJ, 2700 MatGetRowMin_MPIAIJ, 2701 0, 2702 0, 2703 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2704 0, 2705 0, 2706 0, 2707 0, 2708 /*119*/0, 2709 0, 2710 0, 2711 0, 2712 MatLoadnew_MPIAIJ 2713 }; 2714 2715 /* ----------------------------------------------------------------------------------------*/ 2716 2717 EXTERN_C_BEGIN 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2720 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2721 { 2722 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2723 PetscErrorCode ierr; 2724 2725 PetscFunctionBegin; 2726 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2727 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2728 PetscFunctionReturn(0); 2729 } 2730 EXTERN_C_END 2731 2732 EXTERN_C_BEGIN 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2735 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2736 { 2737 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2738 PetscErrorCode ierr; 2739 2740 PetscFunctionBegin; 2741 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2742 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2743 PetscFunctionReturn(0); 2744 } 2745 EXTERN_C_END 2746 2747 #include "petscpc.h" 2748 EXTERN_C_BEGIN 2749 #undef __FUNCT__ 2750 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2751 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2752 { 2753 Mat_MPIAIJ *b; 2754 PetscErrorCode ierr; 2755 PetscInt i; 2756 2757 PetscFunctionBegin; 2758 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2759 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2760 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2761 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2762 2763 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2764 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2765 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2766 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2767 if (d_nnz) { 2768 for (i=0; i<B->rmap->n; i++) { 2769 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2770 } 2771 } 2772 if (o_nnz) { 2773 for (i=0; i<B->rmap->n; i++) { 2774 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2775 } 2776 } 2777 b = (Mat_MPIAIJ*)B->data; 2778 2779 if (!B->preallocated) { 2780 /* Explicitly create 2 MATSEQAIJ matrices. */ 2781 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2782 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2783 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2785 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2786 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2787 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2788 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2789 } 2790 2791 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2792 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2793 B->preallocated = PETSC_TRUE; 2794 PetscFunctionReturn(0); 2795 } 2796 EXTERN_C_END 2797 2798 #undef __FUNCT__ 2799 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2800 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2801 { 2802 Mat mat; 2803 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2804 PetscErrorCode ierr; 2805 2806 PetscFunctionBegin; 2807 *newmat = 0; 2808 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2809 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2810 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2811 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2812 a = (Mat_MPIAIJ*)mat->data; 2813 2814 mat->factortype = matin->factortype; 2815 mat->rmap->bs = matin->rmap->bs; 2816 mat->assembled = PETSC_TRUE; 2817 mat->insertmode = NOT_SET_VALUES; 2818 mat->preallocated = PETSC_TRUE; 2819 2820 a->size = oldmat->size; 2821 a->rank = oldmat->rank; 2822 a->donotstash = oldmat->donotstash; 2823 a->roworiented = oldmat->roworiented; 2824 a->rowindices = 0; 2825 a->rowvalues = 0; 2826 a->getrowactive = PETSC_FALSE; 2827 2828 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2829 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2830 2831 if (oldmat->colmap) { 2832 #if defined (PETSC_USE_CTABLE) 2833 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2834 #else 2835 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2836 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2837 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2838 #endif 2839 } else a->colmap = 0; 2840 if (oldmat->garray) { 2841 PetscInt len; 2842 len = oldmat->B->cmap->n; 2843 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2844 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2845 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2846 } else a->garray = 0; 2847 2848 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2849 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2850 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2851 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2852 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2854 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2856 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2857 *newmat = mat; 2858 PetscFunctionReturn(0); 2859 } 2860 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "MatLoad_MPIAIJ" 2863 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2864 { 2865 Mat A; 2866 PetscScalar *vals,*svals; 2867 MPI_Comm comm = ((PetscObject)viewer)->comm; 2868 MPI_Status status; 2869 PetscErrorCode ierr; 2870 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2871 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2872 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2873 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2874 PetscInt cend,cstart,n,*rowners; 2875 int fd; 2876 2877 PetscFunctionBegin; 2878 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2879 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2880 if (!rank) { 2881 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2882 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2883 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2884 } 2885 2886 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2887 M = header[1]; N = header[2]; 2888 /* determine ownership of all rows */ 2889 m = M/size + ((M % size) > rank); 2890 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2891 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2892 2893 /* First process needs enough room for process with most rows */ 2894 if (!rank) { 2895 mmax = rowners[1]; 2896 for (i=2; i<size; i++) { 2897 mmax = PetscMax(mmax,rowners[i]); 2898 } 2899 } else mmax = m; 2900 2901 rowners[0] = 0; 2902 for (i=2; i<=size; i++) { 2903 rowners[i] += rowners[i-1]; 2904 } 2905 rstart = rowners[rank]; 2906 rend = rowners[rank+1]; 2907 2908 /* distribute row lengths to all processors */ 2909 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2910 if (!rank) { 2911 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2912 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2913 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2914 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2915 for (j=0; j<m; j++) { 2916 procsnz[0] += ourlens[j]; 2917 } 2918 for (i=1; i<size; i++) { 2919 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2920 /* calculate the number of nonzeros on each processor */ 2921 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2922 procsnz[i] += rowlengths[j]; 2923 } 2924 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2925 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2926 } 2927 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2928 } else { 2929 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2930 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2931 } 2932 2933 if (!rank) { 2934 /* determine max buffer needed and allocate it */ 2935 maxnz = 0; 2936 for (i=0; i<size; i++) { 2937 maxnz = PetscMax(maxnz,procsnz[i]); 2938 } 2939 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2940 2941 /* read in my part of the matrix column indices */ 2942 nz = procsnz[0]; 2943 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2944 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2945 2946 /* read in every one elses and ship off */ 2947 for (i=1; i<size; i++) { 2948 nz = procsnz[i]; 2949 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2950 mpicnt = PetscMPIIntCast(nz); 2951 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2952 } 2953 ierr = PetscFree(cols);CHKERRQ(ierr); 2954 } else { 2955 /* determine buffer space needed for message */ 2956 nz = 0; 2957 for (i=0; i<m; i++) { 2958 nz += ourlens[i]; 2959 } 2960 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2961 2962 /* receive message of column indices*/ 2963 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2964 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2965 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2966 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 2967 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 2968 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 2969 } 2970 2971 /* determine column ownership if matrix is not square */ 2972 if (N != M) { 2973 n = N/size + ((N % size) > rank); 2974 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2975 cstart = cend - n; 2976 } else { 2977 cstart = rstart; 2978 cend = rend; 2979 n = cend - cstart; 2980 } 2981 2982 /* loop over local rows, determining number of off diagonal entries */ 2983 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2984 jj = 0; 2985 for (i=0; i<m; i++) { 2986 for (j=0; j<ourlens[i]; j++) { 2987 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2988 jj++; 2989 } 2990 } 2991 2992 /* create our matrix */ 2993 for (i=0; i<m; i++) { 2994 ourlens[i] -= offlens[i]; 2995 } 2996 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2997 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2998 ierr = MatSetType(A,type);CHKERRQ(ierr); 2999 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3000 3001 for (i=0; i<m; i++) { 3002 ourlens[i] += offlens[i]; 3003 } 3004 3005 if (!rank) { 3006 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3007 3008 /* read in my part of the matrix numerical values */ 3009 nz = procsnz[0]; 3010 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3011 3012 /* insert into matrix */ 3013 jj = rstart; 3014 smycols = mycols; 3015 svals = vals; 3016 for (i=0; i<m; i++) { 3017 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3018 smycols += ourlens[i]; 3019 svals += ourlens[i]; 3020 jj++; 3021 } 3022 3023 /* read in other processors and ship out */ 3024 for (i=1; i<size; i++) { 3025 nz = procsnz[i]; 3026 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3027 mpicnt = PetscMPIIntCast(nz); 3028 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3029 } 3030 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3031 } else { 3032 /* receive numeric values */ 3033 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3034 3035 /* receive message of values*/ 3036 mpicnt = PetscMPIIntCast(nz); 3037 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3038 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3039 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 3040 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 3041 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 3042 3043 /* insert into matrix */ 3044 jj = rstart; 3045 smycols = mycols; 3046 svals = vals; 3047 for (i=0; i<m; i++) { 3048 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3049 smycols += ourlens[i]; 3050 svals += ourlens[i]; 3051 jj++; 3052 } 3053 } 3054 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3055 ierr = PetscFree(vals);CHKERRQ(ierr); 3056 ierr = PetscFree(mycols);CHKERRQ(ierr); 3057 ierr = PetscFree(rowners);CHKERRQ(ierr); 3058 3059 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 *newmat = A; 3062 PetscFunctionReturn(0); 3063 } 3064 3065 #undef __FUNCT__ 3066 #define __FUNCT__ "MatLoadnew_MPIAIJ" 3067 PetscErrorCode MatLoadnew_MPIAIJ(PetscViewer viewer, Mat newMat) 3068 { 3069 PetscScalar *vals,*svals; 3070 MPI_Comm comm = ((PetscObject)viewer)->comm; 3071 MPI_Status status; 3072 PetscErrorCode ierr; 3073 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 3074 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3075 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3076 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3077 PetscInt cend,cstart,n,*rowners,sizesset=1; 3078 int fd; 3079 3080 PetscFunctionBegin; 3081 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3082 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3083 if (!rank) { 3084 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3085 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3086 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3087 } 3088 3089 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3090 3091 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3092 M = header[1]; N = header[2]; 3093 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3094 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3095 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3096 3097 /* If global sizes are set, check if they are consistent with that given in the file */ 3098 if (sizesset) { 3099 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3100 } 3101 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3102 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3103 3104 /* determine ownership of all rows */ 3105 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3106 else m = newMat->rmap->n; /* Set by user */ 3107 3108 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3109 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3110 3111 /* First process needs enough room for process with most rows */ 3112 if (!rank) { 3113 mmax = rowners[1]; 3114 for (i=2; i<size; i++) { 3115 mmax = PetscMax(mmax,rowners[i]); 3116 } 3117 } else mmax = m; 3118 3119 rowners[0] = 0; 3120 for (i=2; i<=size; i++) { 3121 rowners[i] += rowners[i-1]; 3122 } 3123 rstart = rowners[rank]; 3124 rend = rowners[rank+1]; 3125 3126 /* distribute row lengths to all processors */ 3127 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3128 if (!rank) { 3129 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3130 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3131 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3132 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3133 for (j=0; j<m; j++) { 3134 procsnz[0] += ourlens[j]; 3135 } 3136 for (i=1; i<size; i++) { 3137 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3138 /* calculate the number of nonzeros on each processor */ 3139 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3140 procsnz[i] += rowlengths[j]; 3141 } 3142 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 3143 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3144 } 3145 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3146 } else { 3147 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 3148 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3149 } 3150 3151 if (!rank) { 3152 /* determine max buffer needed and allocate it */ 3153 maxnz = 0; 3154 for (i=0; i<size; i++) { 3155 maxnz = PetscMax(maxnz,procsnz[i]); 3156 } 3157 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3158 3159 /* read in my part of the matrix column indices */ 3160 nz = procsnz[0]; 3161 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3162 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3163 3164 /* read in every one elses and ship off */ 3165 for (i=1; i<size; i++) { 3166 nz = procsnz[i]; 3167 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3168 mpicnt = PetscMPIIntCast(nz); 3169 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3170 } 3171 ierr = PetscFree(cols);CHKERRQ(ierr); 3172 } else { 3173 /* determine buffer space needed for message */ 3174 nz = 0; 3175 for (i=0; i<m; i++) { 3176 nz += ourlens[i]; 3177 } 3178 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3179 3180 /* receive message of column indices*/ 3181 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 3182 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3183 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 3184 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 3185 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 3186 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 3187 } 3188 3189 /* determine column ownership if matrix is not square */ 3190 if (N != M) { 3191 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3192 else n = newMat->cmap->n; 3193 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3194 cstart = cend - n; 3195 } else { 3196 cstart = rstart; 3197 cend = rend; 3198 n = cend - cstart; 3199 } 3200 3201 /* loop over local rows, determining number of off diagonal entries */ 3202 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3203 jj = 0; 3204 for (i=0; i<m; i++) { 3205 for (j=0; j<ourlens[i]; j++) { 3206 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3207 jj++; 3208 } 3209 } 3210 3211 for (i=0; i<m; i++) { 3212 ourlens[i] -= offlens[i]; 3213 } 3214 if (!sizesset) { 3215 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3216 } 3217 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3218 3219 for (i=0; i<m; i++) { 3220 ourlens[i] += offlens[i]; 3221 } 3222 3223 if (!rank) { 3224 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3225 3226 /* read in my part of the matrix numerical values */ 3227 nz = procsnz[0]; 3228 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3229 3230 /* insert into matrix */ 3231 jj = rstart; 3232 smycols = mycols; 3233 svals = vals; 3234 for (i=0; i<m; i++) { 3235 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3236 smycols += ourlens[i]; 3237 svals += ourlens[i]; 3238 jj++; 3239 } 3240 3241 /* read in other processors and ship out */ 3242 for (i=1; i<size; i++) { 3243 nz = procsnz[i]; 3244 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3245 mpicnt = PetscMPIIntCast(nz); 3246 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3247 } 3248 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3249 } else { 3250 /* receive numeric values */ 3251 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3252 3253 /* receive message of values*/ 3254 mpicnt = PetscMPIIntCast(nz); 3255 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm,&status);CHKERRQ(ierr); 3256 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3257 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 3258 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 3259 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 3260 3261 /* insert into matrix */ 3262 jj = rstart; 3263 smycols = mycols; 3264 svals = vals; 3265 for (i=0; i<m; i++) { 3266 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3267 smycols += ourlens[i]; 3268 svals += ourlens[i]; 3269 jj++; 3270 } 3271 } 3272 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3273 ierr = PetscFree(vals);CHKERRQ(ierr); 3274 ierr = PetscFree(mycols);CHKERRQ(ierr); 3275 ierr = PetscFree(rowners);CHKERRQ(ierr); 3276 3277 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3278 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3279 PetscFunctionReturn(0); 3280 } 3281 3282 #undef __FUNCT__ 3283 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3284 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3285 { 3286 PetscErrorCode ierr; 3287 IS iscol_local; 3288 PetscInt csize; 3289 3290 PetscFunctionBegin; 3291 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3292 if (call == MAT_REUSE_MATRIX) { 3293 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3294 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3295 } else { 3296 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3297 } 3298 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3299 if (call == MAT_INITIAL_MATRIX) { 3300 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3301 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3302 } 3303 PetscFunctionReturn(0); 3304 } 3305 3306 #undef __FUNCT__ 3307 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3308 /* 3309 Not great since it makes two copies of the submatrix, first an SeqAIJ 3310 in local and then by concatenating the local matrices the end result. 3311 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3312 3313 Note: This requires a sequential iscol with all indices. 3314 */ 3315 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3316 { 3317 PetscErrorCode ierr; 3318 PetscMPIInt rank,size; 3319 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3320 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3321 Mat *local,M,Mreuse; 3322 MatScalar *vwork,*aa; 3323 MPI_Comm comm = ((PetscObject)mat)->comm; 3324 Mat_SeqAIJ *aij; 3325 3326 3327 PetscFunctionBegin; 3328 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3329 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3330 3331 if (call == MAT_REUSE_MATRIX) { 3332 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3333 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3334 local = &Mreuse; 3335 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3336 } else { 3337 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3338 Mreuse = *local; 3339 ierr = PetscFree(local);CHKERRQ(ierr); 3340 } 3341 3342 /* 3343 m - number of local rows 3344 n - number of columns (same on all processors) 3345 rstart - first row in new global matrix generated 3346 */ 3347 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3348 if (call == MAT_INITIAL_MATRIX) { 3349 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3350 ii = aij->i; 3351 jj = aij->j; 3352 3353 /* 3354 Determine the number of non-zeros in the diagonal and off-diagonal 3355 portions of the matrix in order to do correct preallocation 3356 */ 3357 3358 /* first get start and end of "diagonal" columns */ 3359 if (csize == PETSC_DECIDE) { 3360 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3361 if (mglobal == n) { /* square matrix */ 3362 nlocal = m; 3363 } else { 3364 nlocal = n/size + ((n % size) > rank); 3365 } 3366 } else { 3367 nlocal = csize; 3368 } 3369 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3370 rstart = rend - nlocal; 3371 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3372 3373 /* next, compute all the lengths */ 3374 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3375 olens = dlens + m; 3376 for (i=0; i<m; i++) { 3377 jend = ii[i+1] - ii[i]; 3378 olen = 0; 3379 dlen = 0; 3380 for (j=0; j<jend; j++) { 3381 if (*jj < rstart || *jj >= rend) olen++; 3382 else dlen++; 3383 jj++; 3384 } 3385 olens[i] = olen; 3386 dlens[i] = dlen; 3387 } 3388 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3389 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3390 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3391 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3392 ierr = PetscFree(dlens);CHKERRQ(ierr); 3393 } else { 3394 PetscInt ml,nl; 3395 3396 M = *newmat; 3397 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3398 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3399 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3400 /* 3401 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3402 rather than the slower MatSetValues(). 3403 */ 3404 M->was_assembled = PETSC_TRUE; 3405 M->assembled = PETSC_FALSE; 3406 } 3407 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3408 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3409 ii = aij->i; 3410 jj = aij->j; 3411 aa = aij->a; 3412 for (i=0; i<m; i++) { 3413 row = rstart + i; 3414 nz = ii[i+1] - ii[i]; 3415 cwork = jj; jj += nz; 3416 vwork = aa; aa += nz; 3417 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3418 } 3419 3420 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3421 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3422 *newmat = M; 3423 3424 /* save submatrix used in processor for next request */ 3425 if (call == MAT_INITIAL_MATRIX) { 3426 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3427 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3428 } 3429 3430 PetscFunctionReturn(0); 3431 } 3432 3433 EXTERN_C_BEGIN 3434 #undef __FUNCT__ 3435 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3436 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3437 { 3438 PetscInt m,cstart, cend,j,nnz,i,d; 3439 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3440 const PetscInt *JJ; 3441 PetscScalar *values; 3442 PetscErrorCode ierr; 3443 3444 PetscFunctionBegin; 3445 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3446 3447 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3448 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3449 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3450 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3451 m = B->rmap->n; 3452 cstart = B->cmap->rstart; 3453 cend = B->cmap->rend; 3454 rstart = B->rmap->rstart; 3455 3456 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3457 3458 #if defined(PETSC_USE_DEBUGGING) 3459 for (i=0; i<m; i++) { 3460 nnz = Ii[i+1]- Ii[i]; 3461 JJ = J + Ii[i]; 3462 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3463 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3464 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3465 } 3466 #endif 3467 3468 for (i=0; i<m; i++) { 3469 nnz = Ii[i+1]- Ii[i]; 3470 JJ = J + Ii[i]; 3471 nnz_max = PetscMax(nnz_max,nnz); 3472 d = 0; 3473 for (j=0; j<nnz; j++) { 3474 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3475 } 3476 d_nnz[i] = d; 3477 o_nnz[i] = nnz - d; 3478 } 3479 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3480 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3481 3482 if (v) values = (PetscScalar*)v; 3483 else { 3484 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3485 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3486 } 3487 3488 for (i=0; i<m; i++) { 3489 ii = i + rstart; 3490 nnz = Ii[i+1]- Ii[i]; 3491 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3492 } 3493 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3494 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3495 3496 if (!v) { 3497 ierr = PetscFree(values);CHKERRQ(ierr); 3498 } 3499 PetscFunctionReturn(0); 3500 } 3501 EXTERN_C_END 3502 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3505 /*@ 3506 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3507 (the default parallel PETSc format). 3508 3509 Collective on MPI_Comm 3510 3511 Input Parameters: 3512 + B - the matrix 3513 . i - the indices into j for the start of each local row (starts with zero) 3514 . j - the column indices for each local row (starts with zero) 3515 - v - optional values in the matrix 3516 3517 Level: developer 3518 3519 Notes: 3520 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3521 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3522 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3523 3524 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3525 3526 The format which is used for the sparse matrix input, is equivalent to a 3527 row-major ordering.. i.e for the following matrix, the input data expected is 3528 as shown: 3529 3530 1 0 0 3531 2 0 3 P0 3532 ------- 3533 4 5 6 P1 3534 3535 Process0 [P0]: rows_owned=[0,1] 3536 i = {0,1,3} [size = nrow+1 = 2+1] 3537 j = {0,0,2} [size = nz = 6] 3538 v = {1,2,3} [size = nz = 6] 3539 3540 Process1 [P1]: rows_owned=[2] 3541 i = {0,3} [size = nrow+1 = 1+1] 3542 j = {0,1,2} [size = nz = 6] 3543 v = {4,5,6} [size = nz = 6] 3544 3545 .keywords: matrix, aij, compressed row, sparse, parallel 3546 3547 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3548 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3549 @*/ 3550 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3551 { 3552 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3553 3554 PetscFunctionBegin; 3555 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3556 if (f) { 3557 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3558 } 3559 PetscFunctionReturn(0); 3560 } 3561 3562 #undef __FUNCT__ 3563 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3564 /*@C 3565 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3566 (the default parallel PETSc format). For good matrix assembly performance 3567 the user should preallocate the matrix storage by setting the parameters 3568 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3569 performance can be increased by more than a factor of 50. 3570 3571 Collective on MPI_Comm 3572 3573 Input Parameters: 3574 + A - the matrix 3575 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3576 (same value is used for all local rows) 3577 . d_nnz - array containing the number of nonzeros in the various rows of the 3578 DIAGONAL portion of the local submatrix (possibly different for each row) 3579 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3580 The size of this array is equal to the number of local rows, i.e 'm'. 3581 You must leave room for the diagonal entry even if it is zero. 3582 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3583 submatrix (same value is used for all local rows). 3584 - o_nnz - array containing the number of nonzeros in the various rows of the 3585 OFF-DIAGONAL portion of the local submatrix (possibly different for 3586 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3587 structure. The size of this array is equal to the number 3588 of local rows, i.e 'm'. 3589 3590 If the *_nnz parameter is given then the *_nz parameter is ignored 3591 3592 The AIJ format (also called the Yale sparse matrix format or 3593 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3594 storage. The stored row and column indices begin with zero. See the users manual for details. 3595 3596 The parallel matrix is partitioned such that the first m0 rows belong to 3597 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3598 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3599 3600 The DIAGONAL portion of the local submatrix of a processor can be defined 3601 as the submatrix which is obtained by extraction the part corresponding 3602 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3603 first row that belongs to the processor, and r2 is the last row belonging 3604 to the this processor. This is a square mxm matrix. The remaining portion 3605 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3606 3607 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3608 3609 You can call MatGetInfo() to get information on how effective the preallocation was; 3610 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3611 You can also run with the option -info and look for messages with the string 3612 malloc in them to see if additional memory allocation was needed. 3613 3614 Example usage: 3615 3616 Consider the following 8x8 matrix with 34 non-zero values, that is 3617 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3618 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3619 as follows: 3620 3621 .vb 3622 1 2 0 | 0 3 0 | 0 4 3623 Proc0 0 5 6 | 7 0 0 | 8 0 3624 9 0 10 | 11 0 0 | 12 0 3625 ------------------------------------- 3626 13 0 14 | 15 16 17 | 0 0 3627 Proc1 0 18 0 | 19 20 21 | 0 0 3628 0 0 0 | 22 23 0 | 24 0 3629 ------------------------------------- 3630 Proc2 25 26 27 | 0 0 28 | 29 0 3631 30 0 0 | 31 32 33 | 0 34 3632 .ve 3633 3634 This can be represented as a collection of submatrices as: 3635 3636 .vb 3637 A B C 3638 D E F 3639 G H I 3640 .ve 3641 3642 Where the submatrices A,B,C are owned by proc0, D,E,F are 3643 owned by proc1, G,H,I are owned by proc2. 3644 3645 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3646 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3647 The 'M','N' parameters are 8,8, and have the same values on all procs. 3648 3649 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3650 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3651 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3652 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3653 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3654 matrix, ans [DF] as another SeqAIJ matrix. 3655 3656 When d_nz, o_nz parameters are specified, d_nz storage elements are 3657 allocated for every row of the local diagonal submatrix, and o_nz 3658 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3659 One way to choose d_nz and o_nz is to use the max nonzerors per local 3660 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3661 In this case, the values of d_nz,o_nz are: 3662 .vb 3663 proc0 : dnz = 2, o_nz = 2 3664 proc1 : dnz = 3, o_nz = 2 3665 proc2 : dnz = 1, o_nz = 4 3666 .ve 3667 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3668 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3669 for proc3. i.e we are using 12+15+10=37 storage locations to store 3670 34 values. 3671 3672 When d_nnz, o_nnz parameters are specified, the storage is specified 3673 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3674 In the above case the values for d_nnz,o_nnz are: 3675 .vb 3676 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3677 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3678 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3679 .ve 3680 Here the space allocated is sum of all the above values i.e 34, and 3681 hence pre-allocation is perfect. 3682 3683 Level: intermediate 3684 3685 .keywords: matrix, aij, compressed row, sparse, parallel 3686 3687 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3688 MPIAIJ, MatGetInfo() 3689 @*/ 3690 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3691 { 3692 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3693 3694 PetscFunctionBegin; 3695 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3696 if (f) { 3697 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3698 } 3699 PetscFunctionReturn(0); 3700 } 3701 3702 #undef __FUNCT__ 3703 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3704 /*@ 3705 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3706 CSR format the local rows. 3707 3708 Collective on MPI_Comm 3709 3710 Input Parameters: 3711 + comm - MPI communicator 3712 . m - number of local rows (Cannot be PETSC_DECIDE) 3713 . n - This value should be the same as the local size used in creating the 3714 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3715 calculated if N is given) For square matrices n is almost always m. 3716 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3717 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3718 . i - row indices 3719 . j - column indices 3720 - a - matrix values 3721 3722 Output Parameter: 3723 . mat - the matrix 3724 3725 Level: intermediate 3726 3727 Notes: 3728 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3729 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3730 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3731 3732 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3733 3734 The format which is used for the sparse matrix input, is equivalent to a 3735 row-major ordering.. i.e for the following matrix, the input data expected is 3736 as shown: 3737 3738 1 0 0 3739 2 0 3 P0 3740 ------- 3741 4 5 6 P1 3742 3743 Process0 [P0]: rows_owned=[0,1] 3744 i = {0,1,3} [size = nrow+1 = 2+1] 3745 j = {0,0,2} [size = nz = 6] 3746 v = {1,2,3} [size = nz = 6] 3747 3748 Process1 [P1]: rows_owned=[2] 3749 i = {0,3} [size = nrow+1 = 1+1] 3750 j = {0,1,2} [size = nz = 6] 3751 v = {4,5,6} [size = nz = 6] 3752 3753 .keywords: matrix, aij, compressed row, sparse, parallel 3754 3755 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3756 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3757 @*/ 3758 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3759 { 3760 PetscErrorCode ierr; 3761 3762 PetscFunctionBegin; 3763 if (i[0]) { 3764 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3765 } 3766 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3767 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3768 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3769 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3770 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3771 PetscFunctionReturn(0); 3772 } 3773 3774 #undef __FUNCT__ 3775 #define __FUNCT__ "MatCreateMPIAIJ" 3776 /*@C 3777 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3778 (the default parallel PETSc format). For good matrix assembly performance 3779 the user should preallocate the matrix storage by setting the parameters 3780 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3781 performance can be increased by more than a factor of 50. 3782 3783 Collective on MPI_Comm 3784 3785 Input Parameters: 3786 + comm - MPI communicator 3787 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3788 This value should be the same as the local size used in creating the 3789 y vector for the matrix-vector product y = Ax. 3790 . n - This value should be the same as the local size used in creating the 3791 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3792 calculated if N is given) For square matrices n is almost always m. 3793 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3794 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3795 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3796 (same value is used for all local rows) 3797 . d_nnz - array containing the number of nonzeros in the various rows of the 3798 DIAGONAL portion of the local submatrix (possibly different for each row) 3799 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3800 The size of this array is equal to the number of local rows, i.e 'm'. 3801 You must leave room for the diagonal entry even if it is zero. 3802 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3803 submatrix (same value is used for all local rows). 3804 - o_nnz - array containing the number of nonzeros in the various rows of the 3805 OFF-DIAGONAL portion of the local submatrix (possibly different for 3806 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3807 structure. The size of this array is equal to the number 3808 of local rows, i.e 'm'. 3809 3810 Output Parameter: 3811 . A - the matrix 3812 3813 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3814 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3815 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3816 3817 Notes: 3818 If the *_nnz parameter is given then the *_nz parameter is ignored 3819 3820 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3821 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3822 storage requirements for this matrix. 3823 3824 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3825 processor than it must be used on all processors that share the object for 3826 that argument. 3827 3828 The user MUST specify either the local or global matrix dimensions 3829 (possibly both). 3830 3831 The parallel matrix is partitioned across processors such that the 3832 first m0 rows belong to process 0, the next m1 rows belong to 3833 process 1, the next m2 rows belong to process 2 etc.. where 3834 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3835 values corresponding to [m x N] submatrix. 3836 3837 The columns are logically partitioned with the n0 columns belonging 3838 to 0th partition, the next n1 columns belonging to the next 3839 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3840 3841 The DIAGONAL portion of the local submatrix on any given processor 3842 is the submatrix corresponding to the rows and columns m,n 3843 corresponding to the given processor. i.e diagonal matrix on 3844 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3845 etc. The remaining portion of the local submatrix [m x (N-n)] 3846 constitute the OFF-DIAGONAL portion. The example below better 3847 illustrates this concept. 3848 3849 For a square global matrix we define each processor's diagonal portion 3850 to be its local rows and the corresponding columns (a square submatrix); 3851 each processor's off-diagonal portion encompasses the remainder of the 3852 local matrix (a rectangular submatrix). 3853 3854 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3855 3856 When calling this routine with a single process communicator, a matrix of 3857 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3858 type of communicator, use the construction mechanism: 3859 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3860 3861 By default, this format uses inodes (identical nodes) when possible. 3862 We search for consecutive rows with the same nonzero structure, thereby 3863 reusing matrix information to achieve increased efficiency. 3864 3865 Options Database Keys: 3866 + -mat_no_inode - Do not use inodes 3867 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3868 - -mat_aij_oneindex - Internally use indexing starting at 1 3869 rather than 0. Note that when calling MatSetValues(), 3870 the user still MUST index entries starting at 0! 3871 3872 3873 Example usage: 3874 3875 Consider the following 8x8 matrix with 34 non-zero values, that is 3876 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3877 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3878 as follows: 3879 3880 .vb 3881 1 2 0 | 0 3 0 | 0 4 3882 Proc0 0 5 6 | 7 0 0 | 8 0 3883 9 0 10 | 11 0 0 | 12 0 3884 ------------------------------------- 3885 13 0 14 | 15 16 17 | 0 0 3886 Proc1 0 18 0 | 19 20 21 | 0 0 3887 0 0 0 | 22 23 0 | 24 0 3888 ------------------------------------- 3889 Proc2 25 26 27 | 0 0 28 | 29 0 3890 30 0 0 | 31 32 33 | 0 34 3891 .ve 3892 3893 This can be represented as a collection of submatrices as: 3894 3895 .vb 3896 A B C 3897 D E F 3898 G H I 3899 .ve 3900 3901 Where the submatrices A,B,C are owned by proc0, D,E,F are 3902 owned by proc1, G,H,I are owned by proc2. 3903 3904 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3905 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3906 The 'M','N' parameters are 8,8, and have the same values on all procs. 3907 3908 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3909 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3910 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3911 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3912 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3913 matrix, ans [DF] as another SeqAIJ matrix. 3914 3915 When d_nz, o_nz parameters are specified, d_nz storage elements are 3916 allocated for every row of the local diagonal submatrix, and o_nz 3917 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3918 One way to choose d_nz and o_nz is to use the max nonzerors per local 3919 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3920 In this case, the values of d_nz,o_nz are: 3921 .vb 3922 proc0 : dnz = 2, o_nz = 2 3923 proc1 : dnz = 3, o_nz = 2 3924 proc2 : dnz = 1, o_nz = 4 3925 .ve 3926 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3927 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3928 for proc3. i.e we are using 12+15+10=37 storage locations to store 3929 34 values. 3930 3931 When d_nnz, o_nnz parameters are specified, the storage is specified 3932 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3933 In the above case the values for d_nnz,o_nnz are: 3934 .vb 3935 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3936 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3937 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3938 .ve 3939 Here the space allocated is sum of all the above values i.e 34, and 3940 hence pre-allocation is perfect. 3941 3942 Level: intermediate 3943 3944 .keywords: matrix, aij, compressed row, sparse, parallel 3945 3946 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3947 MPIAIJ, MatCreateMPIAIJWithArrays() 3948 @*/ 3949 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3950 { 3951 PetscErrorCode ierr; 3952 PetscMPIInt size; 3953 3954 PetscFunctionBegin; 3955 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3956 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3957 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3958 if (size > 1) { 3959 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3960 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3961 } else { 3962 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3963 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3964 } 3965 PetscFunctionReturn(0); 3966 } 3967 3968 #undef __FUNCT__ 3969 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3970 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3971 { 3972 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3973 3974 PetscFunctionBegin; 3975 *Ad = a->A; 3976 *Ao = a->B; 3977 *colmap = a->garray; 3978 PetscFunctionReturn(0); 3979 } 3980 3981 #undef __FUNCT__ 3982 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3983 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3984 { 3985 PetscErrorCode ierr; 3986 PetscInt i; 3987 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3988 3989 PetscFunctionBegin; 3990 if (coloring->ctype == IS_COLORING_GLOBAL) { 3991 ISColoringValue *allcolors,*colors; 3992 ISColoring ocoloring; 3993 3994 /* set coloring for diagonal portion */ 3995 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3996 3997 /* set coloring for off-diagonal portion */ 3998 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3999 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4000 for (i=0; i<a->B->cmap->n; i++) { 4001 colors[i] = allcolors[a->garray[i]]; 4002 } 4003 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4004 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4005 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4006 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4007 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4008 ISColoringValue *colors; 4009 PetscInt *larray; 4010 ISColoring ocoloring; 4011 4012 /* set coloring for diagonal portion */ 4013 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4014 for (i=0; i<a->A->cmap->n; i++) { 4015 larray[i] = i + A->cmap->rstart; 4016 } 4017 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4018 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4019 for (i=0; i<a->A->cmap->n; i++) { 4020 colors[i] = coloring->colors[larray[i]]; 4021 } 4022 ierr = PetscFree(larray);CHKERRQ(ierr); 4023 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4024 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4025 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4026 4027 /* set coloring for off-diagonal portion */ 4028 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4029 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4030 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4031 for (i=0; i<a->B->cmap->n; i++) { 4032 colors[i] = coloring->colors[larray[i]]; 4033 } 4034 ierr = PetscFree(larray);CHKERRQ(ierr); 4035 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4036 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4037 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4038 } else { 4039 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4040 } 4041 4042 PetscFunctionReturn(0); 4043 } 4044 4045 #if defined(PETSC_HAVE_ADIC) 4046 #undef __FUNCT__ 4047 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4048 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4049 { 4050 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4051 PetscErrorCode ierr; 4052 4053 PetscFunctionBegin; 4054 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4055 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4056 PetscFunctionReturn(0); 4057 } 4058 #endif 4059 4060 #undef __FUNCT__ 4061 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4062 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4063 { 4064 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4065 PetscErrorCode ierr; 4066 4067 PetscFunctionBegin; 4068 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4069 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4070 PetscFunctionReturn(0); 4071 } 4072 4073 #undef __FUNCT__ 4074 #define __FUNCT__ "MatMerge" 4075 /*@ 4076 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4077 matrices from each processor 4078 4079 Collective on MPI_Comm 4080 4081 Input Parameters: 4082 + comm - the communicators the parallel matrix will live on 4083 . inmat - the input sequential matrices 4084 . n - number of local columns (or PETSC_DECIDE) 4085 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4086 4087 Output Parameter: 4088 . outmat - the parallel matrix generated 4089 4090 Level: advanced 4091 4092 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4093 4094 @*/ 4095 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4096 { 4097 PetscErrorCode ierr; 4098 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4099 PetscInt *indx; 4100 PetscScalar *values; 4101 4102 PetscFunctionBegin; 4103 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4104 if (scall == MAT_INITIAL_MATRIX){ 4105 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4106 if (n == PETSC_DECIDE){ 4107 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4108 } 4109 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4110 rstart -= m; 4111 4112 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4113 for (i=0;i<m;i++) { 4114 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4115 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4116 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4117 } 4118 /* This routine will ONLY return MPIAIJ type matrix */ 4119 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4120 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4121 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4122 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4123 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4124 4125 } else if (scall == MAT_REUSE_MATRIX){ 4126 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4127 } else { 4128 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4129 } 4130 4131 for (i=0;i<m;i++) { 4132 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4133 Ii = i + rstart; 4134 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4135 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4136 } 4137 ierr = MatDestroy(inmat);CHKERRQ(ierr); 4138 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4139 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4140 4141 PetscFunctionReturn(0); 4142 } 4143 4144 #undef __FUNCT__ 4145 #define __FUNCT__ "MatFileSplit" 4146 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4147 { 4148 PetscErrorCode ierr; 4149 PetscMPIInt rank; 4150 PetscInt m,N,i,rstart,nnz; 4151 size_t len; 4152 const PetscInt *indx; 4153 PetscViewer out; 4154 char *name; 4155 Mat B; 4156 const PetscScalar *values; 4157 4158 PetscFunctionBegin; 4159 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4160 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4161 /* Should this be the type of the diagonal block of A? */ 4162 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4163 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4164 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4165 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4166 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4167 for (i=0;i<m;i++) { 4168 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4169 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4170 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4171 } 4172 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4173 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4174 4175 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4176 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4177 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4178 sprintf(name,"%s.%d",outfile,rank); 4179 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4180 ierr = PetscFree(name); 4181 ierr = MatView(B,out);CHKERRQ(ierr); 4182 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4183 ierr = MatDestroy(B);CHKERRQ(ierr); 4184 PetscFunctionReturn(0); 4185 } 4186 4187 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4188 #undef __FUNCT__ 4189 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4190 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4191 { 4192 PetscErrorCode ierr; 4193 Mat_Merge_SeqsToMPI *merge; 4194 PetscContainer container; 4195 4196 PetscFunctionBegin; 4197 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4198 if (container) { 4199 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4200 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4201 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4202 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4203 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4204 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4205 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4206 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4207 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4208 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4209 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4210 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4211 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4212 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4213 4214 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4215 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4216 } 4217 ierr = PetscFree(merge);CHKERRQ(ierr); 4218 4219 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4220 PetscFunctionReturn(0); 4221 } 4222 4223 #include "../src/mat/utils/freespace.h" 4224 #include "petscbt.h" 4225 4226 #undef __FUNCT__ 4227 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4228 /*@C 4229 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4230 matrices from each processor 4231 4232 Collective on MPI_Comm 4233 4234 Input Parameters: 4235 + comm - the communicators the parallel matrix will live on 4236 . seqmat - the input sequential matrices 4237 . m - number of local rows (or PETSC_DECIDE) 4238 . n - number of local columns (or PETSC_DECIDE) 4239 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4240 4241 Output Parameter: 4242 . mpimat - the parallel matrix generated 4243 4244 Level: advanced 4245 4246 Notes: 4247 The dimensions of the sequential matrix in each processor MUST be the same. 4248 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4249 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4250 @*/ 4251 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4252 { 4253 PetscErrorCode ierr; 4254 MPI_Comm comm=((PetscObject)mpimat)->comm; 4255 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4256 PetscMPIInt size,rank,taga,*len_s; 4257 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4258 PetscInt proc,m; 4259 PetscInt **buf_ri,**buf_rj; 4260 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4261 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4262 MPI_Request *s_waits,*r_waits; 4263 MPI_Status *status; 4264 MatScalar *aa=a->a; 4265 MatScalar **abuf_r,*ba_i; 4266 Mat_Merge_SeqsToMPI *merge; 4267 PetscContainer container; 4268 4269 PetscFunctionBegin; 4270 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4271 4272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4273 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4274 4275 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4276 if (container) { 4277 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4278 } 4279 bi = merge->bi; 4280 bj = merge->bj; 4281 buf_ri = merge->buf_ri; 4282 buf_rj = merge->buf_rj; 4283 4284 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4285 owners = merge->rowmap->range; 4286 len_s = merge->len_s; 4287 4288 /* send and recv matrix values */ 4289 /*-----------------------------*/ 4290 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4291 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4292 4293 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4294 for (proc=0,k=0; proc<size; proc++){ 4295 if (!len_s[proc]) continue; 4296 i = owners[proc]; 4297 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4298 k++; 4299 } 4300 4301 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4302 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4303 ierr = PetscFree(status);CHKERRQ(ierr); 4304 4305 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4306 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4307 4308 /* insert mat values of mpimat */ 4309 /*----------------------------*/ 4310 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4311 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4312 4313 for (k=0; k<merge->nrecv; k++){ 4314 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4315 nrows = *(buf_ri_k[k]); 4316 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4317 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4318 } 4319 4320 /* set values of ba */ 4321 m = merge->rowmap->n; 4322 for (i=0; i<m; i++) { 4323 arow = owners[rank] + i; 4324 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4325 bnzi = bi[i+1] - bi[i]; 4326 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4327 4328 /* add local non-zero vals of this proc's seqmat into ba */ 4329 anzi = ai[arow+1] - ai[arow]; 4330 aj = a->j + ai[arow]; 4331 aa = a->a + ai[arow]; 4332 nextaj = 0; 4333 for (j=0; nextaj<anzi; j++){ 4334 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4335 ba_i[j] += aa[nextaj++]; 4336 } 4337 } 4338 4339 /* add received vals into ba */ 4340 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4341 /* i-th row */ 4342 if (i == *nextrow[k]) { 4343 anzi = *(nextai[k]+1) - *nextai[k]; 4344 aj = buf_rj[k] + *(nextai[k]); 4345 aa = abuf_r[k] + *(nextai[k]); 4346 nextaj = 0; 4347 for (j=0; nextaj<anzi; j++){ 4348 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4349 ba_i[j] += aa[nextaj++]; 4350 } 4351 } 4352 nextrow[k]++; nextai[k]++; 4353 } 4354 } 4355 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4356 } 4357 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4358 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4359 4360 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4361 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4362 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4363 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4364 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4365 PetscFunctionReturn(0); 4366 } 4367 4368 #undef __FUNCT__ 4369 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4370 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4371 { 4372 PetscErrorCode ierr; 4373 Mat B_mpi; 4374 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4375 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4376 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4377 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4378 PetscInt len,proc,*dnz,*onz; 4379 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4380 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4381 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4382 MPI_Status *status; 4383 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4384 PetscBT lnkbt; 4385 Mat_Merge_SeqsToMPI *merge; 4386 PetscContainer container; 4387 4388 PetscFunctionBegin; 4389 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4390 4391 /* make sure it is a PETSc comm */ 4392 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4393 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4394 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4395 4396 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4397 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4398 4399 /* determine row ownership */ 4400 /*---------------------------------------------------------*/ 4401 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4402 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4403 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4404 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4405 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4406 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4407 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4408 4409 m = merge->rowmap->n; 4410 M = merge->rowmap->N; 4411 owners = merge->rowmap->range; 4412 4413 /* determine the number of messages to send, their lengths */ 4414 /*---------------------------------------------------------*/ 4415 len_s = merge->len_s; 4416 4417 len = 0; /* length of buf_si[] */ 4418 merge->nsend = 0; 4419 for (proc=0; proc<size; proc++){ 4420 len_si[proc] = 0; 4421 if (proc == rank){ 4422 len_s[proc] = 0; 4423 } else { 4424 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4425 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4426 } 4427 if (len_s[proc]) { 4428 merge->nsend++; 4429 nrows = 0; 4430 for (i=owners[proc]; i<owners[proc+1]; i++){ 4431 if (ai[i+1] > ai[i]) nrows++; 4432 } 4433 len_si[proc] = 2*(nrows+1); 4434 len += len_si[proc]; 4435 } 4436 } 4437 4438 /* determine the number and length of messages to receive for ij-structure */ 4439 /*-------------------------------------------------------------------------*/ 4440 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4441 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4442 4443 /* post the Irecv of j-structure */ 4444 /*-------------------------------*/ 4445 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4446 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4447 4448 /* post the Isend of j-structure */ 4449 /*--------------------------------*/ 4450 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4451 4452 for (proc=0, k=0; proc<size; proc++){ 4453 if (!len_s[proc]) continue; 4454 i = owners[proc]; 4455 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4456 k++; 4457 } 4458 4459 /* receives and sends of j-structure are complete */ 4460 /*------------------------------------------------*/ 4461 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4462 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4463 4464 /* send and recv i-structure */ 4465 /*---------------------------*/ 4466 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4467 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4468 4469 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4470 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4471 for (proc=0,k=0; proc<size; proc++){ 4472 if (!len_s[proc]) continue; 4473 /* form outgoing message for i-structure: 4474 buf_si[0]: nrows to be sent 4475 [1:nrows]: row index (global) 4476 [nrows+1:2*nrows+1]: i-structure index 4477 */ 4478 /*-------------------------------------------*/ 4479 nrows = len_si[proc]/2 - 1; 4480 buf_si_i = buf_si + nrows+1; 4481 buf_si[0] = nrows; 4482 buf_si_i[0] = 0; 4483 nrows = 0; 4484 for (i=owners[proc]; i<owners[proc+1]; i++){ 4485 anzi = ai[i+1] - ai[i]; 4486 if (anzi) { 4487 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4488 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4489 nrows++; 4490 } 4491 } 4492 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4493 k++; 4494 buf_si += len_si[proc]; 4495 } 4496 4497 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4498 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4499 4500 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4501 for (i=0; i<merge->nrecv; i++){ 4502 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4503 } 4504 4505 ierr = PetscFree(len_si);CHKERRQ(ierr); 4506 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4507 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4508 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4509 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4510 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4511 ierr = PetscFree(status);CHKERRQ(ierr); 4512 4513 /* compute a local seq matrix in each processor */ 4514 /*----------------------------------------------*/ 4515 /* allocate bi array and free space for accumulating nonzero column info */ 4516 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4517 bi[0] = 0; 4518 4519 /* create and initialize a linked list */ 4520 nlnk = N+1; 4521 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4522 4523 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4524 len = 0; 4525 len = ai[owners[rank+1]] - ai[owners[rank]]; 4526 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4527 current_space = free_space; 4528 4529 /* determine symbolic info for each local row */ 4530 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4531 4532 for (k=0; k<merge->nrecv; k++){ 4533 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4534 nrows = *buf_ri_k[k]; 4535 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4536 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4537 } 4538 4539 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4540 len = 0; 4541 for (i=0;i<m;i++) { 4542 bnzi = 0; 4543 /* add local non-zero cols of this proc's seqmat into lnk */ 4544 arow = owners[rank] + i; 4545 anzi = ai[arow+1] - ai[arow]; 4546 aj = a->j + ai[arow]; 4547 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4548 bnzi += nlnk; 4549 /* add received col data into lnk */ 4550 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4551 if (i == *nextrow[k]) { /* i-th row */ 4552 anzi = *(nextai[k]+1) - *nextai[k]; 4553 aj = buf_rj[k] + *nextai[k]; 4554 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4555 bnzi += nlnk; 4556 nextrow[k]++; nextai[k]++; 4557 } 4558 } 4559 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4560 4561 /* if free space is not available, make more free space */ 4562 if (current_space->local_remaining<bnzi) { 4563 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4564 nspacedouble++; 4565 } 4566 /* copy data into free space, then initialize lnk */ 4567 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4568 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4569 4570 current_space->array += bnzi; 4571 current_space->local_used += bnzi; 4572 current_space->local_remaining -= bnzi; 4573 4574 bi[i+1] = bi[i] + bnzi; 4575 } 4576 4577 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4578 4579 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4580 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4581 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4582 4583 /* create symbolic parallel matrix B_mpi */ 4584 /*---------------------------------------*/ 4585 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4586 if (n==PETSC_DECIDE) { 4587 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4588 } else { 4589 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4590 } 4591 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4592 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4593 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4594 4595 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4596 B_mpi->assembled = PETSC_FALSE; 4597 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4598 merge->bi = bi; 4599 merge->bj = bj; 4600 merge->buf_ri = buf_ri; 4601 merge->buf_rj = buf_rj; 4602 merge->coi = PETSC_NULL; 4603 merge->coj = PETSC_NULL; 4604 merge->owners_co = PETSC_NULL; 4605 4606 /* attach the supporting struct to B_mpi for reuse */ 4607 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4608 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4609 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4610 *mpimat = B_mpi; 4611 4612 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4613 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4614 PetscFunctionReturn(0); 4615 } 4616 4617 #undef __FUNCT__ 4618 #define __FUNCT__ "MatMerge_SeqsToMPI" 4619 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4620 { 4621 PetscErrorCode ierr; 4622 4623 PetscFunctionBegin; 4624 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4625 if (scall == MAT_INITIAL_MATRIX){ 4626 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4627 } 4628 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4629 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4630 PetscFunctionReturn(0); 4631 } 4632 4633 #undef __FUNCT__ 4634 #define __FUNCT__ "MatGetLocalMat" 4635 /*@ 4636 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4637 4638 Not Collective 4639 4640 Input Parameters: 4641 + A - the matrix 4642 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4643 4644 Output Parameter: 4645 . A_loc - the local sequential matrix generated 4646 4647 Level: developer 4648 4649 @*/ 4650 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4651 { 4652 PetscErrorCode ierr; 4653 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4654 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4655 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4656 MatScalar *aa=a->a,*ba=b->a,*cam; 4657 PetscScalar *ca; 4658 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4659 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4660 4661 PetscFunctionBegin; 4662 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4663 if (scall == MAT_INITIAL_MATRIX){ 4664 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4665 ci[0] = 0; 4666 for (i=0; i<am; i++){ 4667 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4668 } 4669 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4670 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4671 k = 0; 4672 for (i=0; i<am; i++) { 4673 ncols_o = bi[i+1] - bi[i]; 4674 ncols_d = ai[i+1] - ai[i]; 4675 /* off-diagonal portion of A */ 4676 for (jo=0; jo<ncols_o; jo++) { 4677 col = cmap[*bj]; 4678 if (col >= cstart) break; 4679 cj[k] = col; bj++; 4680 ca[k++] = *ba++; 4681 } 4682 /* diagonal portion of A */ 4683 for (j=0; j<ncols_d; j++) { 4684 cj[k] = cstart + *aj++; 4685 ca[k++] = *aa++; 4686 } 4687 /* off-diagonal portion of A */ 4688 for (j=jo; j<ncols_o; j++) { 4689 cj[k] = cmap[*bj++]; 4690 ca[k++] = *ba++; 4691 } 4692 } 4693 /* put together the new matrix */ 4694 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4695 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4696 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4697 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4698 mat->free_a = PETSC_TRUE; 4699 mat->free_ij = PETSC_TRUE; 4700 mat->nonew = 0; 4701 } else if (scall == MAT_REUSE_MATRIX){ 4702 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4703 ci = mat->i; cj = mat->j; cam = mat->a; 4704 for (i=0; i<am; i++) { 4705 /* off-diagonal portion of A */ 4706 ncols_o = bi[i+1] - bi[i]; 4707 for (jo=0; jo<ncols_o; jo++) { 4708 col = cmap[*bj]; 4709 if (col >= cstart) break; 4710 *cam++ = *ba++; bj++; 4711 } 4712 /* diagonal portion of A */ 4713 ncols_d = ai[i+1] - ai[i]; 4714 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4715 /* off-diagonal portion of A */ 4716 for (j=jo; j<ncols_o; j++) { 4717 *cam++ = *ba++; bj++; 4718 } 4719 } 4720 } else { 4721 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4722 } 4723 4724 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4725 PetscFunctionReturn(0); 4726 } 4727 4728 #undef __FUNCT__ 4729 #define __FUNCT__ "MatGetLocalMatCondensed" 4730 /*@C 4731 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4732 4733 Not Collective 4734 4735 Input Parameters: 4736 + A - the matrix 4737 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4738 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4739 4740 Output Parameter: 4741 . A_loc - the local sequential matrix generated 4742 4743 Level: developer 4744 4745 @*/ 4746 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4747 { 4748 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4749 PetscErrorCode ierr; 4750 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4751 IS isrowa,iscola; 4752 Mat *aloc; 4753 4754 PetscFunctionBegin; 4755 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4756 if (!row){ 4757 start = A->rmap->rstart; end = A->rmap->rend; 4758 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4759 } else { 4760 isrowa = *row; 4761 } 4762 if (!col){ 4763 start = A->cmap->rstart; 4764 cmap = a->garray; 4765 nzA = a->A->cmap->n; 4766 nzB = a->B->cmap->n; 4767 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4768 ncols = 0; 4769 for (i=0; i<nzB; i++) { 4770 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4771 else break; 4772 } 4773 imark = i; 4774 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4775 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4776 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4777 ierr = PetscFree(idx);CHKERRQ(ierr); 4778 } else { 4779 iscola = *col; 4780 } 4781 if (scall != MAT_INITIAL_MATRIX){ 4782 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4783 aloc[0] = *A_loc; 4784 } 4785 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4786 *A_loc = aloc[0]; 4787 ierr = PetscFree(aloc);CHKERRQ(ierr); 4788 if (!row){ 4789 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4790 } 4791 if (!col){ 4792 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4793 } 4794 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4795 PetscFunctionReturn(0); 4796 } 4797 4798 #undef __FUNCT__ 4799 #define __FUNCT__ "MatGetBrowsOfAcols" 4800 /*@C 4801 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4802 4803 Collective on Mat 4804 4805 Input Parameters: 4806 + A,B - the matrices in mpiaij format 4807 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4808 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4809 4810 Output Parameter: 4811 + rowb, colb - index sets of rows and columns of B to extract 4812 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4813 - B_seq - the sequential matrix generated 4814 4815 Level: developer 4816 4817 @*/ 4818 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4819 { 4820 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4821 PetscErrorCode ierr; 4822 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4823 IS isrowb,iscolb; 4824 Mat *bseq; 4825 4826 PetscFunctionBegin; 4827 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4828 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4829 } 4830 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4831 4832 if (scall == MAT_INITIAL_MATRIX){ 4833 start = A->cmap->rstart; 4834 cmap = a->garray; 4835 nzA = a->A->cmap->n; 4836 nzB = a->B->cmap->n; 4837 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4838 ncols = 0; 4839 for (i=0; i<nzB; i++) { /* row < local row index */ 4840 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4841 else break; 4842 } 4843 imark = i; 4844 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4845 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4846 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4847 ierr = PetscFree(idx);CHKERRQ(ierr); 4848 *brstart = imark; 4849 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4850 } else { 4851 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4852 isrowb = *rowb; iscolb = *colb; 4853 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4854 bseq[0] = *B_seq; 4855 } 4856 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4857 *B_seq = bseq[0]; 4858 ierr = PetscFree(bseq);CHKERRQ(ierr); 4859 if (!rowb){ 4860 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4861 } else { 4862 *rowb = isrowb; 4863 } 4864 if (!colb){ 4865 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4866 } else { 4867 *colb = iscolb; 4868 } 4869 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4870 PetscFunctionReturn(0); 4871 } 4872 4873 #undef __FUNCT__ 4874 #define __FUNCT__ "MatGetBrowsOfAoCols" 4875 /*@C 4876 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4877 of the OFF-DIAGONAL portion of local A 4878 4879 Collective on Mat 4880 4881 Input Parameters: 4882 + A,B - the matrices in mpiaij format 4883 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4884 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4885 . startsj_r - similar to startsj for receives 4886 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4887 4888 Output Parameter: 4889 + B_oth - the sequential matrix generated 4890 4891 Level: developer 4892 4893 @*/ 4894 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4895 { 4896 VecScatter_MPI_General *gen_to,*gen_from; 4897 PetscErrorCode ierr; 4898 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4899 Mat_SeqAIJ *b_oth; 4900 VecScatter ctx=a->Mvctx; 4901 MPI_Comm comm=((PetscObject)ctx)->comm; 4902 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4903 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4904 PetscScalar *rvalues,*svalues; 4905 MatScalar *b_otha,*bufa,*bufA; 4906 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4907 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4908 MPI_Status *sstatus,rstatus; 4909 PetscMPIInt jj; 4910 PetscInt *cols,sbs,rbs; 4911 PetscScalar *vals; 4912 4913 PetscFunctionBegin; 4914 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4915 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4916 } 4917 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4918 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4919 4920 gen_to = (VecScatter_MPI_General*)ctx->todata; 4921 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4922 rvalues = gen_from->values; /* holds the length of receiving row */ 4923 svalues = gen_to->values; /* holds the length of sending row */ 4924 nrecvs = gen_from->n; 4925 nsends = gen_to->n; 4926 4927 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4928 srow = gen_to->indices; /* local row index to be sent */ 4929 sstarts = gen_to->starts; 4930 sprocs = gen_to->procs; 4931 sstatus = gen_to->sstatus; 4932 sbs = gen_to->bs; 4933 rstarts = gen_from->starts; 4934 rprocs = gen_from->procs; 4935 rbs = gen_from->bs; 4936 4937 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4938 if (scall == MAT_INITIAL_MATRIX){ 4939 /* i-array */ 4940 /*---------*/ 4941 /* post receives */ 4942 for (i=0; i<nrecvs; i++){ 4943 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4944 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4945 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4946 } 4947 4948 /* pack the outgoing message */ 4949 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4950 sstartsj[0] = 0; rstartsj[0] = 0; 4951 len = 0; /* total length of j or a array to be sent */ 4952 k = 0; 4953 for (i=0; i<nsends; i++){ 4954 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4955 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4956 for (j=0; j<nrows; j++) { 4957 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4958 for (l=0; l<sbs; l++){ 4959 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4960 rowlen[j*sbs+l] = ncols; 4961 len += ncols; 4962 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4963 } 4964 k++; 4965 } 4966 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4967 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4968 } 4969 /* recvs and sends of i-array are completed */ 4970 i = nrecvs; 4971 while (i--) { 4972 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4973 } 4974 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4975 4976 /* allocate buffers for sending j and a arrays */ 4977 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4978 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4979 4980 /* create i-array of B_oth */ 4981 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4982 b_othi[0] = 0; 4983 len = 0; /* total length of j or a array to be received */ 4984 k = 0; 4985 for (i=0; i<nrecvs; i++){ 4986 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4987 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4988 for (j=0; j<nrows; j++) { 4989 b_othi[k+1] = b_othi[k] + rowlen[j]; 4990 len += rowlen[j]; k++; 4991 } 4992 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4993 } 4994 4995 /* allocate space for j and a arrrays of B_oth */ 4996 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4997 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4998 4999 /* j-array */ 5000 /*---------*/ 5001 /* post receives of j-array */ 5002 for (i=0; i<nrecvs; i++){ 5003 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5004 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5005 } 5006 5007 /* pack the outgoing message j-array */ 5008 k = 0; 5009 for (i=0; i<nsends; i++){ 5010 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5011 bufJ = bufj+sstartsj[i]; 5012 for (j=0; j<nrows; j++) { 5013 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5014 for (ll=0; ll<sbs; ll++){ 5015 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5016 for (l=0; l<ncols; l++){ 5017 *bufJ++ = cols[l]; 5018 } 5019 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5020 } 5021 } 5022 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5023 } 5024 5025 /* recvs and sends of j-array are completed */ 5026 i = nrecvs; 5027 while (i--) { 5028 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5029 } 5030 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5031 } else if (scall == MAT_REUSE_MATRIX){ 5032 sstartsj = *startsj; 5033 rstartsj = *startsj_r; 5034 bufa = *bufa_ptr; 5035 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5036 b_otha = b_oth->a; 5037 } else { 5038 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5039 } 5040 5041 /* a-array */ 5042 /*---------*/ 5043 /* post receives of a-array */ 5044 for (i=0; i<nrecvs; i++){ 5045 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5046 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5047 } 5048 5049 /* pack the outgoing message a-array */ 5050 k = 0; 5051 for (i=0; i<nsends; i++){ 5052 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5053 bufA = bufa+sstartsj[i]; 5054 for (j=0; j<nrows; j++) { 5055 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5056 for (ll=0; ll<sbs; ll++){ 5057 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5058 for (l=0; l<ncols; l++){ 5059 *bufA++ = vals[l]; 5060 } 5061 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5062 } 5063 } 5064 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5065 } 5066 /* recvs and sends of a-array are completed */ 5067 i = nrecvs; 5068 while (i--) { 5069 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5070 } 5071 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5072 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5073 5074 if (scall == MAT_INITIAL_MATRIX){ 5075 /* put together the new matrix */ 5076 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5077 5078 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5079 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5080 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5081 b_oth->free_a = PETSC_TRUE; 5082 b_oth->free_ij = PETSC_TRUE; 5083 b_oth->nonew = 0; 5084 5085 ierr = PetscFree(bufj);CHKERRQ(ierr); 5086 if (!startsj || !bufa_ptr){ 5087 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5088 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5089 } else { 5090 *startsj = sstartsj; 5091 *startsj_r = rstartsj; 5092 *bufa_ptr = bufa; 5093 } 5094 } 5095 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5096 PetscFunctionReturn(0); 5097 } 5098 5099 #undef __FUNCT__ 5100 #define __FUNCT__ "MatGetCommunicationStructs" 5101 /*@C 5102 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5103 5104 Not Collective 5105 5106 Input Parameters: 5107 . A - The matrix in mpiaij format 5108 5109 Output Parameter: 5110 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5111 . colmap - A map from global column index to local index into lvec 5112 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5113 5114 Level: developer 5115 5116 @*/ 5117 #if defined (PETSC_USE_CTABLE) 5118 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5119 #else 5120 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5121 #endif 5122 { 5123 Mat_MPIAIJ *a; 5124 5125 PetscFunctionBegin; 5126 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5127 PetscValidPointer(lvec, 2); 5128 PetscValidPointer(colmap, 3); 5129 PetscValidPointer(multScatter, 4); 5130 a = (Mat_MPIAIJ *) A->data; 5131 if (lvec) *lvec = a->lvec; 5132 if (colmap) *colmap = a->colmap; 5133 if (multScatter) *multScatter = a->Mvctx; 5134 PetscFunctionReturn(0); 5135 } 5136 5137 EXTERN_C_BEGIN 5138 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 5139 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 5140 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5141 EXTERN_C_END 5142 5143 #undef __FUNCT__ 5144 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5145 /* 5146 Computes (B'*A')' since computing B*A directly is untenable 5147 5148 n p p 5149 ( ) ( ) ( ) 5150 m ( A ) * n ( B ) = m ( C ) 5151 ( ) ( ) ( ) 5152 5153 */ 5154 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5155 { 5156 PetscErrorCode ierr; 5157 Mat At,Bt,Ct; 5158 5159 PetscFunctionBegin; 5160 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5161 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5162 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5163 ierr = MatDestroy(At);CHKERRQ(ierr); 5164 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5165 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5166 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5167 PetscFunctionReturn(0); 5168 } 5169 5170 #undef __FUNCT__ 5171 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5172 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5173 { 5174 PetscErrorCode ierr; 5175 PetscInt m=A->rmap->n,n=B->cmap->n; 5176 Mat Cmat; 5177 5178 PetscFunctionBegin; 5179 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5180 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5181 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5182 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5183 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5184 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5185 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5186 *C = Cmat; 5187 PetscFunctionReturn(0); 5188 } 5189 5190 /* ----------------------------------------------------------------*/ 5191 #undef __FUNCT__ 5192 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5193 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5194 { 5195 PetscErrorCode ierr; 5196 5197 PetscFunctionBegin; 5198 if (scall == MAT_INITIAL_MATRIX){ 5199 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5200 } 5201 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5202 PetscFunctionReturn(0); 5203 } 5204 5205 EXTERN_C_BEGIN 5206 #if defined(PETSC_HAVE_MUMPS) 5207 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5208 #endif 5209 #if defined(PETSC_HAVE_PASTIX) 5210 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5211 #endif 5212 #if defined(PETSC_HAVE_SUPERLU_DIST) 5213 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5214 #endif 5215 #if defined(PETSC_HAVE_SPOOLES) 5216 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5217 #endif 5218 EXTERN_C_END 5219 5220 /*MC 5221 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5222 5223 Options Database Keys: 5224 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5225 5226 Level: beginner 5227 5228 .seealso: MatCreateMPIAIJ() 5229 M*/ 5230 5231 EXTERN_C_BEGIN 5232 #undef __FUNCT__ 5233 #define __FUNCT__ "MatCreate_MPIAIJ" 5234 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5235 { 5236 Mat_MPIAIJ *b; 5237 PetscErrorCode ierr; 5238 PetscMPIInt size; 5239 5240 PetscFunctionBegin; 5241 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5242 5243 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5244 B->data = (void*)b; 5245 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5246 B->rmap->bs = 1; 5247 B->assembled = PETSC_FALSE; 5248 B->mapping = 0; 5249 5250 B->insertmode = NOT_SET_VALUES; 5251 b->size = size; 5252 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5253 5254 /* build cache for off array entries formed */ 5255 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5256 b->donotstash = PETSC_FALSE; 5257 b->colmap = 0; 5258 b->garray = 0; 5259 b->roworiented = PETSC_TRUE; 5260 5261 /* stuff used for matrix vector multiply */ 5262 b->lvec = PETSC_NULL; 5263 b->Mvctx = PETSC_NULL; 5264 5265 /* stuff for MatGetRow() */ 5266 b->rowindices = 0; 5267 b->rowvalues = 0; 5268 b->getrowactive = PETSC_FALSE; 5269 5270 #if defined(PETSC_HAVE_SPOOLES) 5271 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5272 "MatGetFactor_mpiaij_spooles", 5273 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5274 #endif 5275 #if defined(PETSC_HAVE_MUMPS) 5276 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5277 "MatGetFactor_aij_mumps", 5278 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5279 #endif 5280 #if defined(PETSC_HAVE_PASTIX) 5281 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5282 "MatGetFactor_mpiaij_pastix", 5283 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5284 #endif 5285 #if defined(PETSC_HAVE_SUPERLU_DIST) 5286 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5287 "MatGetFactor_mpiaij_superlu_dist", 5288 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5289 #endif 5290 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5291 "MatStoreValues_MPIAIJ", 5292 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5293 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5294 "MatRetrieveValues_MPIAIJ", 5295 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5296 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5297 "MatGetDiagonalBlock_MPIAIJ", 5298 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5299 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5300 "MatIsTranspose_MPIAIJ", 5301 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5302 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5303 "MatMPIAIJSetPreallocation_MPIAIJ", 5304 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5305 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5306 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5307 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5308 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5309 "MatDiagonalScaleLocal_MPIAIJ", 5310 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5311 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5312 "MatConvert_MPIAIJ_MPICSRPERM", 5313 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5314 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5315 "MatConvert_MPIAIJ_MPICRL", 5316 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5317 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5318 "MatConvert_MPIAIJ_MPISBAIJ", 5319 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5320 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5321 "MatMatMult_MPIDense_MPIAIJ", 5322 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5323 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5324 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5325 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5326 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5327 "MatMatMultNumeric_MPIDense_MPIAIJ", 5328 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5329 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5330 PetscFunctionReturn(0); 5331 } 5332 EXTERN_C_END 5333 5334 #undef __FUNCT__ 5335 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5336 /*@ 5337 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5338 and "off-diagonal" part of the matrix in CSR format. 5339 5340 Collective on MPI_Comm 5341 5342 Input Parameters: 5343 + comm - MPI communicator 5344 . m - number of local rows (Cannot be PETSC_DECIDE) 5345 . n - This value should be the same as the local size used in creating the 5346 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5347 calculated if N is given) For square matrices n is almost always m. 5348 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5349 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5350 . i - row indices for "diagonal" portion of matrix 5351 . j - column indices 5352 . a - matrix values 5353 . oi - row indices for "off-diagonal" portion of matrix 5354 . oj - column indices 5355 - oa - matrix values 5356 5357 Output Parameter: 5358 . mat - the matrix 5359 5360 Level: advanced 5361 5362 Notes: 5363 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5364 5365 The i and j indices are 0 based 5366 5367 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5368 5369 This sets local rows and cannot be used to set off-processor values. 5370 5371 You cannot later use MatSetValues() to change values in this matrix. 5372 5373 .keywords: matrix, aij, compressed row, sparse, parallel 5374 5375 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5376 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5377 @*/ 5378 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5379 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5380 { 5381 PetscErrorCode ierr; 5382 Mat_MPIAIJ *maij; 5383 5384 PetscFunctionBegin; 5385 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5386 if (i[0]) { 5387 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5388 } 5389 if (oi[0]) { 5390 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5391 } 5392 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5393 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5394 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5395 maij = (Mat_MPIAIJ*) (*mat)->data; 5396 maij->donotstash = PETSC_TRUE; 5397 (*mat)->preallocated = PETSC_TRUE; 5398 5399 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5400 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5401 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5402 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5403 5404 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5405 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5406 5407 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5408 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5409 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5410 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5411 5412 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5413 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5414 PetscFunctionReturn(0); 5415 } 5416 5417 /* 5418 Special version for direct calls from Fortran 5419 */ 5420 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5421 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5422 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5423 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5424 #endif 5425 5426 /* Change these macros so can be used in void function */ 5427 #undef CHKERRQ 5428 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5429 #undef SETERRQ2 5430 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5431 #undef SETERRQ 5432 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5433 5434 EXTERN_C_BEGIN 5435 #undef __FUNCT__ 5436 #define __FUNCT__ "matsetvaluesmpiaij_" 5437 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5438 { 5439 Mat mat = *mmat; 5440 PetscInt m = *mm, n = *mn; 5441 InsertMode addv = *maddv; 5442 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5443 PetscScalar value; 5444 PetscErrorCode ierr; 5445 5446 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5447 if (mat->insertmode == NOT_SET_VALUES) { 5448 mat->insertmode = addv; 5449 } 5450 #if defined(PETSC_USE_DEBUG) 5451 else if (mat->insertmode != addv) { 5452 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5453 } 5454 #endif 5455 { 5456 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5457 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5458 PetscTruth roworiented = aij->roworiented; 5459 5460 /* Some Variables required in the macro */ 5461 Mat A = aij->A; 5462 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5463 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5464 MatScalar *aa = a->a; 5465 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5466 Mat B = aij->B; 5467 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5468 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5469 MatScalar *ba = b->a; 5470 5471 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5472 PetscInt nonew = a->nonew; 5473 MatScalar *ap1,*ap2; 5474 5475 PetscFunctionBegin; 5476 for (i=0; i<m; i++) { 5477 if (im[i] < 0) continue; 5478 #if defined(PETSC_USE_DEBUG) 5479 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5480 #endif 5481 if (im[i] >= rstart && im[i] < rend) { 5482 row = im[i] - rstart; 5483 lastcol1 = -1; 5484 rp1 = aj + ai[row]; 5485 ap1 = aa + ai[row]; 5486 rmax1 = aimax[row]; 5487 nrow1 = ailen[row]; 5488 low1 = 0; 5489 high1 = nrow1; 5490 lastcol2 = -1; 5491 rp2 = bj + bi[row]; 5492 ap2 = ba + bi[row]; 5493 rmax2 = bimax[row]; 5494 nrow2 = bilen[row]; 5495 low2 = 0; 5496 high2 = nrow2; 5497 5498 for (j=0; j<n; j++) { 5499 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5500 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5501 if (in[j] >= cstart && in[j] < cend){ 5502 col = in[j] - cstart; 5503 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5504 } else if (in[j] < 0) continue; 5505 #if defined(PETSC_USE_DEBUG) 5506 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5507 #endif 5508 else { 5509 if (mat->was_assembled) { 5510 if (!aij->colmap) { 5511 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5512 } 5513 #if defined (PETSC_USE_CTABLE) 5514 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5515 col--; 5516 #else 5517 col = aij->colmap[in[j]] - 1; 5518 #endif 5519 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5520 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5521 col = in[j]; 5522 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5523 B = aij->B; 5524 b = (Mat_SeqAIJ*)B->data; 5525 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5526 rp2 = bj + bi[row]; 5527 ap2 = ba + bi[row]; 5528 rmax2 = bimax[row]; 5529 nrow2 = bilen[row]; 5530 low2 = 0; 5531 high2 = nrow2; 5532 bm = aij->B->rmap->n; 5533 ba = b->a; 5534 } 5535 } else col = in[j]; 5536 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5537 } 5538 } 5539 } else { 5540 if (!aij->donotstash) { 5541 if (roworiented) { 5542 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5543 } else { 5544 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5545 } 5546 } 5547 } 5548 }} 5549 PetscFunctionReturnVoid(); 5550 } 5551 EXTERN_C_END 5552 5553