1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscTruth roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscTruth ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 468 mat->insertmode = addv; /* in case this processor had no cache */ 469 470 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 471 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 472 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 482 PetscErrorCode ierr; 483 PetscMPIInt n; 484 PetscInt i,j,rstart,ncols,flg; 485 PetscInt *row,*col; 486 PetscTruth other_disassembled; 487 PetscScalar *val; 488 InsertMode addv = mat->insertmode; 489 490 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 491 PetscFunctionBegin; 492 if (!aij->donotstash) { 493 while (1) { 494 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 495 if (!flg) break; 496 497 for (i=0; i<n;) { 498 /* Now identify the consecutive vals belonging to the same row */ 499 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 500 if (j < n) ncols = j-i; 501 else ncols = n-i; 502 /* Now assemble all these values with a single function call */ 503 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 504 i = j; 505 } 506 } 507 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 508 } 509 a->compressedrow.use = PETSC_FALSE; 510 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 511 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 512 513 /* determine if any processor has disassembled, if so we must 514 also disassemble ourselfs, in order that we may reassemble. */ 515 /* 516 if nonzero structure of submatrix B cannot change then we know that 517 no processor disassembled thus we can skip this stuff 518 */ 519 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 520 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 521 if (mat->was_assembled && !other_disassembled) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 } 524 } 525 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 526 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 529 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 530 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 532 533 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 534 aij->rowvalues = 0; 535 536 /* used by MatAXPY() */ 537 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 538 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 539 540 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 541 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 548 { 549 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 554 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatZeroRows_MPIAIJ" 560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 561 { 562 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 563 PetscErrorCode ierr; 564 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 565 PetscInt i,*owners = A->rmap->range; 566 PetscInt *nprocs,j,idx,nsends,row; 567 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 568 PetscInt *rvalues,count,base,slen,*source; 569 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 570 MPI_Comm comm = ((PetscObject)A)->comm; 571 MPI_Request *send_waits,*recv_waits; 572 MPI_Status recv_status,*send_status; 573 #if defined(PETSC_DEBUG) 574 PetscTruth found = PETSC_FALSE; 575 #endif 576 577 PetscFunctionBegin; 578 /* first count number of contributors to each processor */ 579 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 580 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 581 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 582 j = 0; 583 for (i=0; i<N; i++) { 584 if (lastidx > (idx = rows[i])) j = 0; 585 lastidx = idx; 586 for (; j<size; j++) { 587 if (idx >= owners[j] && idx < owners[j+1]) { 588 nprocs[2*j]++; 589 nprocs[2*j+1] = 1; 590 owner[i] = j; 591 #if defined(PETSC_DEBUG) 592 found = PETSC_TRUE; 593 #endif 594 break; 595 } 596 } 597 #if defined(PETSC_DEBUG) 598 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 599 found = PETSC_FALSE; 600 #endif 601 } 602 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 603 604 /* inform other processors of number of messages and max length*/ 605 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 606 607 /* post receives: */ 608 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 609 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 610 for (i=0; i<nrecvs; i++) { 611 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 612 } 613 614 /* do sends: 615 1) starts[i] gives the starting index in svalues for stuff going to 616 the ith processor 617 */ 618 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 619 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 620 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 621 starts[0] = 0; 622 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 623 for (i=0; i<N; i++) { 624 svalues[starts[owner[i]]++] = rows[i]; 625 } 626 627 starts[0] = 0; 628 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 629 count = 0; 630 for (i=0; i<size; i++) { 631 if (nprocs[2*i+1]) { 632 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscFree(starts);CHKERRQ(ierr); 636 637 base = owners[rank]; 638 639 /* wait on receives */ 640 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 641 count = nrecvs; slen = 0; 642 while (count) { 643 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 644 /* unpack receives into our local space */ 645 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 646 source[imdex] = recv_status.MPI_SOURCE; 647 lens[imdex] = n; 648 slen += n; 649 count--; 650 } 651 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 652 653 /* move the data into the send scatter */ 654 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 655 count = 0; 656 for (i=0; i<nrecvs; i++) { 657 values = rvalues + i*nmax; 658 for (j=0; j<lens[i]; j++) { 659 lrows[count++] = values[j] - base; 660 } 661 } 662 ierr = PetscFree(rvalues);CHKERRQ(ierr); 663 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 664 ierr = PetscFree(owner);CHKERRQ(ierr); 665 ierr = PetscFree(nprocs);CHKERRQ(ierr); 666 667 /* actually zap the local rows */ 668 /* 669 Zero the required rows. If the "diagonal block" of the matrix 670 is square and the user wishes to set the diagonal we use separate 671 code so that MatSetValues() is not called for each diagonal allocating 672 new memory, thus calling lots of mallocs and slowing things down. 673 674 */ 675 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 676 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 677 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 678 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 679 } else if (diag != 0.0) { 680 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 681 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 682 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 684 } 685 for (i = 0; i < slen; i++) { 686 row = lrows[i] + rstart; 687 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 } else { 692 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 693 } 694 ierr = PetscFree(lrows);CHKERRQ(ierr); 695 696 /* wait on sends */ 697 if (nsends) { 698 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 699 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 700 ierr = PetscFree(send_status);CHKERRQ(ierr); 701 } 702 ierr = PetscFree(send_waits);CHKERRQ(ierr); 703 ierr = PetscFree(svalues);CHKERRQ(ierr); 704 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatMult_MPIAIJ" 710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 711 { 712 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscInt nt; 715 716 PetscFunctionBegin; 717 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 718 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 719 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 721 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 729 { 730 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 733 PetscFunctionBegin; 734 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatMultAdd_MPIAIJ" 740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 748 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 758 PetscErrorCode ierr; 759 PetscTruth merged; 760 761 PetscFunctionBegin; 762 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 763 /* do nondiagonal part */ 764 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 765 if (!merged) { 766 /* send it on its way */ 767 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 /* do local part */ 769 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 770 /* receive remote parts: note this assumes the values are not actually */ 771 /* added in yy until the next line, */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } else { 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* send it on its way */ 777 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 /* values actually were received in the Begin() but we need to call this nop */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 EXTERN_C_BEGIN 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 788 { 789 MPI_Comm comm; 790 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 791 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 792 IS Me,Notme; 793 PetscErrorCode ierr; 794 PetscInt M,N,first,last,*notme,i; 795 PetscMPIInt size; 796 797 PetscFunctionBegin; 798 799 /* Easy test: symmetric diagonal block */ 800 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 801 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 802 if (!*f) PetscFunctionReturn(0); 803 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 804 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 805 if (size == 1) PetscFunctionReturn(0); 806 807 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 808 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 809 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 810 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 811 for (i=0; i<first; i++) notme[i] = i; 812 for (i=last; i<M; i++) notme[i-last+first] = i; 813 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 814 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 815 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 816 Aoff = Aoffs[0]; 817 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 818 Boff = Boffs[0]; 819 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 820 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 821 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 822 ierr = ISDestroy(Me);CHKERRQ(ierr); 823 ierr = ISDestroy(Notme);CHKERRQ(ierr); 824 825 PetscFunctionReturn(0); 826 } 827 EXTERN_C_END 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 /* do nondiagonal part */ 838 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 839 /* send it on its way */ 840 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 841 /* do local part */ 842 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 843 /* receive remote parts */ 844 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 /* 849 This only works correctly for square matrices where the subblock A->A is the 850 diagonal block 851 */ 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 855 { 856 PetscErrorCode ierr; 857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 858 859 PetscFunctionBegin; 860 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 861 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 862 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatScale_MPIAIJ" 868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 869 { 870 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 875 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatDestroy_MPIAIJ" 881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 882 { 883 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 #if defined(PETSC_USE_LOG) 888 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 889 #endif 890 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 891 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 892 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 893 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 894 #if defined (PETSC_USE_CTABLE) 895 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 896 #else 897 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 898 #endif 899 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 900 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 901 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 902 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 903 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 904 ierr = PetscFree(aij);CHKERRQ(ierr); 905 906 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatView_MPIAIJ_Binary" 920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 921 { 922 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 923 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 924 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 925 PetscErrorCode ierr; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 int fd; 928 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 929 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 930 PetscScalar *column_values; 931 932 PetscFunctionBegin; 933 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 934 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 935 nz = A->nz + B->nz; 936 if (!rank) { 937 header[0] = MAT_FILE_CLASSID; 938 header[1] = mat->rmap->N; 939 header[2] = mat->cmap->N; 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 942 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 943 /* get largest number of rows any processor has */ 944 rlen = mat->rmap->n; 945 range = mat->rmap->range; 946 for (i=1; i<size; i++) { 947 rlen = PetscMax(rlen,range[i+1] - range[i]); 948 } 949 } else { 950 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 951 rlen = mat->rmap->n; 952 } 953 954 /* load up the local row counts */ 955 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 956 for (i=0; i<mat->rmap->n; i++) { 957 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 958 } 959 960 /* store the row lengths to the file */ 961 if (!rank) { 962 MPI_Status status; 963 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 964 for (i=1; i<size; i++) { 965 rlen = range[i+1] - range[i]; 966 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 967 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 } 969 } else { 970 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 971 } 972 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 973 974 /* load up the local column indices */ 975 nzmax = nz; /* )th processor needs space a largest processor needs */ 976 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 978 cnt = 0; 979 for (i=0; i<mat->rmap->n; i++) { 980 for (j=B->i[i]; j<B->i[i+1]; j++) { 981 if ( (col = garray[B->j[j]]) > cstart) break; 982 column_indices[cnt++] = col; 983 } 984 for (k=A->i[i]; k<A->i[i+1]; k++) { 985 column_indices[cnt++] = A->j[k] + cstart; 986 } 987 for (; j<B->i[i+1]; j++) { 988 column_indices[cnt++] = garray[B->j[j]]; 989 } 990 } 991 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 992 993 /* store the column indices to the file */ 994 if (!rank) { 995 MPI_Status status; 996 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 for (i=1; i<size; i++) { 998 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 999 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1000 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1001 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1002 } 1003 } else { 1004 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1005 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1008 1009 /* load up the local column values */ 1010 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1011 cnt = 0; 1012 for (i=0; i<mat->rmap->n; i++) { 1013 for (j=B->i[i]; j<B->i[i+1]; j++) { 1014 if ( garray[B->j[j]] > cstart) break; 1015 column_values[cnt++] = B->a[j]; 1016 } 1017 for (k=A->i[i]; k<A->i[i+1]; k++) { 1018 column_values[cnt++] = A->a[k]; 1019 } 1020 for (; j<B->i[i+1]; j++) { 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 } 1024 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1025 1026 /* store the column values to the file */ 1027 if (!rank) { 1028 MPI_Status status; 1029 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1030 for (i=1; i<size; i++) { 1031 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1032 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1033 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1034 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1035 } 1036 } else { 1037 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1038 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1039 } 1040 ierr = PetscFree(column_values);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1047 { 1048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1049 PetscErrorCode ierr; 1050 PetscMPIInt rank = aij->rank,size = aij->size; 1051 PetscTruth isdraw,iascii,isbinary; 1052 PetscViewer sviewer; 1053 PetscViewerFormat format; 1054 1055 PetscFunctionBegin; 1056 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1059 if (iascii) { 1060 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1061 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1062 MatInfo info; 1063 PetscTruth inodes; 1064 1065 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1066 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1067 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1068 if (!inodes) { 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1070 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1071 } else { 1072 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1073 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1074 } 1075 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1077 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1079 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1081 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1084 PetscInt inodecount,inodelimit,*inodes; 1085 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1086 if (inodes) { 1087 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1090 } 1091 PetscFunctionReturn(0); 1092 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1093 PetscFunctionReturn(0); 1094 } 1095 } else if (isbinary) { 1096 if (size == 1) { 1097 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1098 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1099 } else { 1100 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } else if (isdraw) { 1104 PetscDraw draw; 1105 PetscTruth isnull; 1106 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1107 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1108 } 1109 1110 if (size == 1) { 1111 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1112 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1113 } else { 1114 /* assemble the entire matrix onto first processor. */ 1115 Mat A; 1116 Mat_SeqAIJ *Aloc; 1117 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1118 MatScalar *a; 1119 1120 if (mat->rmap->N > 1024) { 1121 PetscTruth flg = PETSC_FALSE; 1122 1123 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1124 if (!flg) { 1125 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1126 } 1127 } 1128 1129 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1130 if (!rank) { 1131 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1132 } else { 1133 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1134 } 1135 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1136 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqAIJ*)aij->A->data; 1142 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 row = mat->rmap->rstart; 1144 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1148 } 1149 aj = Aloc->j; 1150 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1151 1152 /* copy over the B part */ 1153 Aloc = (Mat_SeqAIJ*)aij->B->data; 1154 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1155 row = mat->rmap->rstart; 1156 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1157 ct = cols; 1158 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1159 for (i=0; i<m; i++) { 1160 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1161 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1162 } 1163 ierr = PetscFree(ct);CHKERRQ(ierr); 1164 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 /* 1167 Everyone has to call to draw the matrix since the graphics waits are 1168 synchronized across all processors that share the PetscDraw object 1169 */ 1170 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 if (!rank) { 1172 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1173 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1176 ierr = MatDestroy(A);CHKERRQ(ierr); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatView_MPIAIJ" 1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1184 { 1185 PetscErrorCode ierr; 1186 PetscTruth iascii,isdraw,issocket,isbinary; 1187 1188 PetscFunctionBegin; 1189 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1193 if (iascii || isdraw || isbinary || issocket) { 1194 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1195 } else { 1196 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSOR_MPIAIJ" 1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1204 { 1205 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1206 PetscErrorCode ierr; 1207 Vec bb1 = 0; 1208 PetscTruth hasop; 1209 1210 PetscFunctionBegin; 1211 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1212 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1213 } 1214 1215 if (flag == SOR_APPLY_UPPER) { 1216 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1221 if (flag & SOR_ZERO_INITIAL_GUESS) { 1222 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 its--; 1224 } 1225 1226 while (its--) { 1227 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 1230 /* update rhs: bb1 = bb - B*x */ 1231 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1233 1234 /* local sweep */ 1235 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1236 } 1237 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1238 if (flag & SOR_ZERO_INITIAL_GUESS) { 1239 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1240 its--; 1241 } 1242 while (its--) { 1243 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 1246 /* update rhs: bb1 = bb - B*x */ 1247 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1249 1250 /* local sweep */ 1251 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1252 } 1253 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1254 if (flag & SOR_ZERO_INITIAL_GUESS) { 1255 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1256 its--; 1257 } 1258 while (its--) { 1259 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 1262 /* update rhs: bb1 = bb - B*x */ 1263 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1264 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1265 1266 /* local sweep */ 1267 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1268 } 1269 } else if (flag & SOR_EISENSTAT) { 1270 Vec xx1; 1271 1272 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1273 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1274 1275 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 if (!mat->diag) { 1278 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1279 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1280 } 1281 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1282 if (hasop) { 1283 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1284 } else { 1285 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1286 } 1287 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1288 1289 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1290 1291 /* local sweep */ 1292 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1293 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1294 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1295 } else { 1296 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1297 } 1298 1299 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPermute_MPIAIJ" 1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1306 { 1307 MPI_Comm comm,pcomm; 1308 PetscInt first,local_size,nrows; 1309 const PetscInt *rows; 1310 PetscMPIInt size; 1311 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1316 /* make a collective version of 'rowp' */ 1317 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1318 if (pcomm==comm) { 1319 crowp = rowp; 1320 } else { 1321 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1322 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1323 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1325 } 1326 /* collect the global row permutation and invert it */ 1327 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1328 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1329 if (pcomm!=comm) { 1330 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1331 } 1332 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1333 /* get the local target indices */ 1334 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1337 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1338 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1340 /* the column permutation is so much easier; 1341 make a local version of 'colp' and invert it */ 1342 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1344 if (size==1) { 1345 lcolp = colp; 1346 } else { 1347 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1348 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1349 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1350 } 1351 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1352 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1353 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1354 if (size>1) { 1355 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1357 } 1358 /* now we just get the submatrix */ 1359 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1360 /* clean up */ 1361 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1362 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1371 Mat A = mat->A,B = mat->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = 1.0; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } 1404 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1405 info->fill_ratio_needed = 0; 1406 info->factor_mallocs = 0; 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetOption_MPIAIJ" 1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 switch (op) { 1420 case MAT_NEW_NONZERO_LOCATIONS: 1421 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1422 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1423 case MAT_KEEP_NONZERO_PATTERN: 1424 case MAT_NEW_NONZERO_LOCATION_ERR: 1425 case MAT_USE_INODES: 1426 case MAT_IGNORE_ZERO_ENTRIES: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = PETSC_TRUE; 1440 break; 1441 case MAT_SYMMETRIC: 1442 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1443 break; 1444 case MAT_STRUCTURALLY_SYMMETRIC: 1445 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1446 break; 1447 case MAT_HERMITIAN: 1448 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1449 break; 1450 case MAT_SYMMETRY_ETERNAL: 1451 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1452 break; 1453 default: 1454 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatGetRow_MPIAIJ" 1461 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1462 { 1463 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1464 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1465 PetscErrorCode ierr; 1466 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1467 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1468 PetscInt *cmap,*idx_p; 1469 1470 PetscFunctionBegin; 1471 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1472 mat->getrowactive = PETSC_TRUE; 1473 1474 if (!mat->rowvalues && (idx || v)) { 1475 /* 1476 allocate enough space to hold information from the longest row. 1477 */ 1478 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1479 PetscInt max = 1,tmp; 1480 for (i=0; i<matin->rmap->n; i++) { 1481 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1482 if (max < tmp) { max = tmp; } 1483 } 1484 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1485 } 1486 1487 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1488 lrow = row - rstart; 1489 1490 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1491 if (!v) {pvA = 0; pvB = 0;} 1492 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1493 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1494 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1495 nztot = nzA + nzB; 1496 1497 cmap = mat->garray; 1498 if (v || idx) { 1499 if (nztot) { 1500 /* Sort by increasing column numbers, assuming A and B already sorted */ 1501 PetscInt imark = -1; 1502 if (v) { 1503 *v = v_p = mat->rowvalues; 1504 for (i=0; i<nzB; i++) { 1505 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1506 else break; 1507 } 1508 imark = i; 1509 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1510 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1511 } 1512 if (idx) { 1513 *idx = idx_p = mat->rowindices; 1514 if (imark > -1) { 1515 for (i=0; i<imark; i++) { 1516 idx_p[i] = cmap[cworkB[i]]; 1517 } 1518 } else { 1519 for (i=0; i<nzB; i++) { 1520 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1521 else break; 1522 } 1523 imark = i; 1524 } 1525 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1526 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1527 } 1528 } else { 1529 if (idx) *idx = 0; 1530 if (v) *v = 0; 1531 } 1532 } 1533 *nz = nztot; 1534 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1535 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1541 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1542 { 1543 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1544 1545 PetscFunctionBegin; 1546 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1547 aij->getrowactive = PETSC_FALSE; 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatNorm_MPIAIJ" 1553 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1554 { 1555 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1556 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1557 PetscErrorCode ierr; 1558 PetscInt i,j,cstart = mat->cmap->rstart; 1559 PetscReal sum = 0.0; 1560 MatScalar *v; 1561 1562 PetscFunctionBegin; 1563 if (aij->size == 1) { 1564 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1565 } else { 1566 if (type == NORM_FROBENIUS) { 1567 v = amat->a; 1568 for (i=0; i<amat->nz; i++) { 1569 #if defined(PETSC_USE_COMPLEX) 1570 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1571 #else 1572 sum += (*v)*(*v); v++; 1573 #endif 1574 } 1575 v = bmat->a; 1576 for (i=0; i<bmat->nz; i++) { 1577 #if defined(PETSC_USE_COMPLEX) 1578 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1579 #else 1580 sum += (*v)*(*v); v++; 1581 #endif 1582 } 1583 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1584 *norm = sqrt(*norm); 1585 } else if (type == NORM_1) { /* max column norm */ 1586 PetscReal *tmp,*tmp2; 1587 PetscInt *jj,*garray = aij->garray; 1588 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1589 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1590 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1591 *norm = 0.0; 1592 v = amat->a; jj = amat->j; 1593 for (j=0; j<amat->nz; j++) { 1594 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1595 } 1596 v = bmat->a; jj = bmat->j; 1597 for (j=0; j<bmat->nz; j++) { 1598 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1599 } 1600 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1601 for (j=0; j<mat->cmap->N; j++) { 1602 if (tmp2[j] > *norm) *norm = tmp2[j]; 1603 } 1604 ierr = PetscFree(tmp);CHKERRQ(ierr); 1605 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1606 } else if (type == NORM_INFINITY) { /* max row norm */ 1607 PetscReal ntemp = 0.0; 1608 for (j=0; j<aij->A->rmap->n; j++) { 1609 v = amat->a + amat->i[j]; 1610 sum = 0.0; 1611 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1612 sum += PetscAbsScalar(*v); v++; 1613 } 1614 v = bmat->a + bmat->i[j]; 1615 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1616 sum += PetscAbsScalar(*v); v++; 1617 } 1618 if (sum > ntemp) ntemp = sum; 1619 } 1620 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1621 } else { 1622 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1623 } 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatTranspose_MPIAIJ" 1630 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1631 { 1632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1633 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1634 PetscErrorCode ierr; 1635 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1636 PetscInt cstart=A->cmap->rstart,ncol; 1637 Mat B; 1638 MatScalar *array; 1639 1640 PetscFunctionBegin; 1641 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1642 1643 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1644 ai = Aloc->i; aj = Aloc->j; 1645 bi = Bloc->i; bj = Bloc->j; 1646 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1647 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1648 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1649 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1650 for (i=0; i<ai[ma]; i++){ 1651 d_nnz[aj[i]] ++; 1652 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1653 } 1654 1655 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1656 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1657 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1658 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1659 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1660 } else { 1661 B = *matout; 1662 } 1663 1664 /* copy over the A part */ 1665 array = Aloc->a; 1666 row = A->rmap->rstart; 1667 for (i=0; i<ma; i++) { 1668 ncol = ai[i+1]-ai[i]; 1669 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1670 row++; array += ncol; aj += ncol; 1671 } 1672 aj = Aloc->j; 1673 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1674 1675 /* copy over the B part */ 1676 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1677 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1678 array = Bloc->a; 1679 row = A->rmap->rstart; 1680 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1681 cols_tmp = cols; 1682 for (i=0; i<mb; i++) { 1683 ncol = bi[i+1]-bi[i]; 1684 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1685 row++; array += ncol; cols_tmp += ncol; 1686 } 1687 ierr = PetscFree(cols);CHKERRQ(ierr); 1688 1689 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1690 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1692 *matout = B; 1693 } else { 1694 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1695 } 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1701 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1702 { 1703 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1704 Mat a = aij->A,b = aij->B; 1705 PetscErrorCode ierr; 1706 PetscInt s1,s2,s3; 1707 1708 PetscFunctionBegin; 1709 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1710 if (rr) { 1711 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1712 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1713 /* Overlap communication with computation. */ 1714 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1715 } 1716 if (ll) { 1717 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1718 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1719 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1720 } 1721 /* scale the diagonal block */ 1722 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1723 1724 if (rr) { 1725 /* Do a scatter end and then right scale the off-diagonal block */ 1726 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1727 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1728 } 1729 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1735 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1736 { 1737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1738 PetscErrorCode ierr; 1739 1740 PetscFunctionBegin; 1741 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1742 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1743 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1744 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1745 PetscFunctionReturn(0); 1746 } 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1749 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1750 { 1751 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1752 PetscErrorCode ierr; 1753 1754 PetscFunctionBegin; 1755 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatEqual_MPIAIJ" 1761 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1762 { 1763 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1764 Mat a,b,c,d; 1765 PetscTruth flg; 1766 PetscErrorCode ierr; 1767 1768 PetscFunctionBegin; 1769 a = matA->A; b = matA->B; 1770 c = matB->A; d = matB->B; 1771 1772 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1773 if (flg) { 1774 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1775 } 1776 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatCopy_MPIAIJ" 1782 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1783 { 1784 PetscErrorCode ierr; 1785 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1786 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1787 1788 PetscFunctionBegin; 1789 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1790 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1791 /* because of the column compression in the off-processor part of the matrix a->B, 1792 the number of columns in a->B and b->B may be different, hence we cannot call 1793 the MatCopy() directly on the two parts. If need be, we can provide a more 1794 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1795 then copying the submatrices */ 1796 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1797 } else { 1798 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1799 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1806 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1807 { 1808 PetscErrorCode ierr; 1809 1810 PetscFunctionBegin; 1811 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatAXPY_MPIAIJ" 1817 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1818 { 1819 PetscErrorCode ierr; 1820 PetscInt i; 1821 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1822 PetscBLASInt bnz,one=1; 1823 Mat_SeqAIJ *x,*y; 1824 1825 PetscFunctionBegin; 1826 if (str == SAME_NONZERO_PATTERN) { 1827 PetscScalar alpha = a; 1828 x = (Mat_SeqAIJ *)xx->A->data; 1829 y = (Mat_SeqAIJ *)yy->A->data; 1830 bnz = PetscBLASIntCast(x->nz); 1831 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1832 x = (Mat_SeqAIJ *)xx->B->data; 1833 y = (Mat_SeqAIJ *)yy->B->data; 1834 bnz = PetscBLASIntCast(x->nz); 1835 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1836 } else if (str == SUBSET_NONZERO_PATTERN) { 1837 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1838 1839 x = (Mat_SeqAIJ *)xx->B->data; 1840 y = (Mat_SeqAIJ *)yy->B->data; 1841 if (y->xtoy && y->XtoY != xx->B) { 1842 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1843 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1844 } 1845 if (!y->xtoy) { /* get xtoy */ 1846 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1847 y->XtoY = xx->B; 1848 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1849 } 1850 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1851 } else { 1852 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1853 } 1854 PetscFunctionReturn(0); 1855 } 1856 1857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatConjugate_MPIAIJ" 1861 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1862 { 1863 #if defined(PETSC_USE_COMPLEX) 1864 PetscErrorCode ierr; 1865 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1866 1867 PetscFunctionBegin; 1868 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1869 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1870 #else 1871 PetscFunctionBegin; 1872 #endif 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatRealPart_MPIAIJ" 1878 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1879 { 1880 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1881 PetscErrorCode ierr; 1882 1883 PetscFunctionBegin; 1884 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1885 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1891 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1892 { 1893 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1894 PetscErrorCode ierr; 1895 1896 PetscFunctionBegin; 1897 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1898 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #ifdef PETSC_HAVE_PBGL 1903 1904 #include <boost/parallel/mpi/bsp_process_group.hpp> 1905 #include <boost/graph/distributed/ilu_default_graph.hpp> 1906 #include <boost/graph/distributed/ilu_0_block.hpp> 1907 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1908 #include <boost/graph/distributed/petsc/interface.hpp> 1909 #include <boost/multi_array.hpp> 1910 #include <boost/parallel/distributed_property_map->hpp> 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1914 /* 1915 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1916 */ 1917 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1918 { 1919 namespace petsc = boost::distributed::petsc; 1920 1921 namespace graph_dist = boost::graph::distributed; 1922 using boost::graph::distributed::ilu_default::process_group_type; 1923 using boost::graph::ilu_permuted; 1924 1925 PetscTruth row_identity, col_identity; 1926 PetscContainer c; 1927 PetscInt m, n, M, N; 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1932 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1933 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1934 if (!row_identity || !col_identity) { 1935 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1936 } 1937 1938 process_group_type pg; 1939 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1940 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1941 lgraph_type& level_graph = *lgraph_p; 1942 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1943 1944 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1945 ilu_permuted(level_graph); 1946 1947 /* put together the new matrix */ 1948 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1949 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1950 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1951 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1952 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1953 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 1956 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1957 ierr = PetscContainerSetPointer(c, lgraph_p); 1958 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1964 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1965 { 1966 PetscFunctionBegin; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatSolve_MPIAIJ" 1972 /* 1973 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1974 */ 1975 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1976 { 1977 namespace graph_dist = boost::graph::distributed; 1978 1979 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1980 lgraph_type* lgraph_p; 1981 PetscContainer c; 1982 PetscErrorCode ierr; 1983 1984 PetscFunctionBegin; 1985 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1986 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1987 ierr = VecCopy(b, x);CHKERRQ(ierr); 1988 1989 PetscScalar* array_x; 1990 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1991 PetscInt sx; 1992 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1993 1994 PetscScalar* array_b; 1995 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1996 PetscInt sb; 1997 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1998 1999 lgraph_type& level_graph = *lgraph_p; 2000 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2001 2002 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2003 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2004 ref_x(array_x, boost::extents[num_vertices(graph)]); 2005 2006 typedef boost::iterator_property_map<array_ref_type::iterator, 2007 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2008 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2009 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2010 2011 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2012 2013 PetscFunctionReturn(0); 2014 } 2015 #endif 2016 2017 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2018 PetscInt nzlocal,nsends,nrecvs; 2019 PetscMPIInt *send_rank,*recv_rank; 2020 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2021 PetscScalar *sbuf_a,**rbuf_a; 2022 PetscErrorCode (*MatDestroy)(Mat); 2023 } Mat_Redundant; 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2027 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2028 { 2029 PetscErrorCode ierr; 2030 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2031 PetscInt i; 2032 2033 PetscFunctionBegin; 2034 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2035 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2036 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2037 for (i=0; i<redund->nrecvs; i++){ 2038 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2039 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2040 } 2041 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2042 ierr = PetscFree(redund);CHKERRQ(ierr); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatDestroy_MatRedundant" 2048 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2049 { 2050 PetscErrorCode ierr; 2051 PetscContainer container; 2052 Mat_Redundant *redund=PETSC_NULL; 2053 2054 PetscFunctionBegin; 2055 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2056 if (container) { 2057 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2058 } else { 2059 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2060 } 2061 A->ops->destroy = redund->MatDestroy; 2062 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2063 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2064 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2070 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2071 { 2072 PetscMPIInt rank,size; 2073 MPI_Comm comm=((PetscObject)mat)->comm; 2074 PetscErrorCode ierr; 2075 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2076 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2077 PetscInt *rowrange=mat->rmap->range; 2078 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2079 Mat A=aij->A,B=aij->B,C=*matredundant; 2080 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2081 PetscScalar *sbuf_a; 2082 PetscInt nzlocal=a->nz+b->nz; 2083 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2084 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2085 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2086 MatScalar *aworkA,*aworkB; 2087 PetscScalar *vals; 2088 PetscMPIInt tag1,tag2,tag3,imdex; 2089 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2090 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2091 MPI_Status recv_status,*send_status; 2092 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2093 PetscInt **rbuf_j=PETSC_NULL; 2094 PetscScalar **rbuf_a=PETSC_NULL; 2095 Mat_Redundant *redund=PETSC_NULL; 2096 PetscContainer container; 2097 2098 PetscFunctionBegin; 2099 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 2102 if (reuse == MAT_REUSE_MATRIX) { 2103 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2104 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2105 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2106 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2107 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2108 if (container) { 2109 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2110 } else { 2111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2112 } 2113 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2114 2115 nsends = redund->nsends; 2116 nrecvs = redund->nrecvs; 2117 send_rank = redund->send_rank; 2118 recv_rank = redund->recv_rank; 2119 sbuf_nz = redund->sbuf_nz; 2120 rbuf_nz = redund->rbuf_nz; 2121 sbuf_j = redund->sbuf_j; 2122 sbuf_a = redund->sbuf_a; 2123 rbuf_j = redund->rbuf_j; 2124 rbuf_a = redund->rbuf_a; 2125 } 2126 2127 if (reuse == MAT_INITIAL_MATRIX){ 2128 PetscMPIInt subrank,subsize; 2129 PetscInt nleftover,np_subcomm; 2130 /* get the destination processors' id send_rank, nsends and nrecvs */ 2131 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2132 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2133 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2134 np_subcomm = size/nsubcomm; 2135 nleftover = size - nsubcomm*np_subcomm; 2136 nsends = 0; nrecvs = 0; 2137 for (i=0; i<size; i++){ /* i=rank*/ 2138 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2139 send_rank[nsends] = i; nsends++; 2140 recv_rank[nrecvs++] = i; 2141 } 2142 } 2143 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2144 i = size-nleftover-1; 2145 j = 0; 2146 while (j < nsubcomm - nleftover){ 2147 send_rank[nsends++] = i; 2148 i--; j++; 2149 } 2150 } 2151 2152 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2153 for (i=0; i<nleftover; i++){ 2154 recv_rank[nrecvs++] = size-nleftover+i; 2155 } 2156 } 2157 2158 /* allocate sbuf_j, sbuf_a */ 2159 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2160 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2161 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2162 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2163 2164 /* copy mat's local entries into the buffers */ 2165 if (reuse == MAT_INITIAL_MATRIX){ 2166 rownz_max = 0; 2167 rptr = sbuf_j; 2168 cols = sbuf_j + rend-rstart + 1; 2169 vals = sbuf_a; 2170 rptr[0] = 0; 2171 for (i=0; i<rend-rstart; i++){ 2172 row = i + rstart; 2173 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2174 ncols = nzA + nzB; 2175 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2176 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2177 /* load the column indices for this row into cols */ 2178 lwrite = 0; 2179 for (l=0; l<nzB; l++) { 2180 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2181 vals[lwrite] = aworkB[l]; 2182 cols[lwrite++] = ctmp; 2183 } 2184 } 2185 for (l=0; l<nzA; l++){ 2186 vals[lwrite] = aworkA[l]; 2187 cols[lwrite++] = cstart + cworkA[l]; 2188 } 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 vals += ncols; 2196 cols += ncols; 2197 rptr[i+1] = rptr[i] + ncols; 2198 if (rownz_max < ncols) rownz_max = ncols; 2199 } 2200 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2201 } else { /* only copy matrix values into sbuf_a */ 2202 rptr = sbuf_j; 2203 vals = sbuf_a; 2204 rptr[0] = 0; 2205 for (i=0; i<rend-rstart; i++){ 2206 row = i + rstart; 2207 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2208 ncols = nzA + nzB; 2209 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2210 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2211 lwrite = 0; 2212 for (l=0; l<nzB; l++) { 2213 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2214 } 2215 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2216 for (l=0; l<nzB; l++) { 2217 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2218 } 2219 vals += ncols; 2220 rptr[i+1] = rptr[i] + ncols; 2221 } 2222 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2223 2224 /* send nzlocal to others, and recv other's nzlocal */ 2225 /*--------------------------------------------------*/ 2226 if (reuse == MAT_INITIAL_MATRIX){ 2227 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2228 s_waits2 = s_waits3 + nsends; 2229 s_waits1 = s_waits2 + nsends; 2230 r_waits1 = s_waits1 + nsends; 2231 r_waits2 = r_waits1 + nrecvs; 2232 r_waits3 = r_waits2 + nrecvs; 2233 } else { 2234 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2235 r_waits3 = s_waits3 + nsends; 2236 } 2237 2238 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2239 if (reuse == MAT_INITIAL_MATRIX){ 2240 /* get new tags to keep the communication clean */ 2241 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2242 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2243 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2244 2245 /* post receives of other's nzlocal */ 2246 for (i=0; i<nrecvs; i++){ 2247 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2248 } 2249 /* send nzlocal to others */ 2250 for (i=0; i<nsends; i++){ 2251 sbuf_nz[i] = nzlocal; 2252 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2253 } 2254 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2255 count = nrecvs; 2256 while (count) { 2257 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2258 recv_rank[imdex] = recv_status.MPI_SOURCE; 2259 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2260 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2261 2262 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2263 rbuf_nz[imdex] += i + 2; 2264 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2265 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2266 count--; 2267 } 2268 /* wait on sends of nzlocal */ 2269 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2270 /* send mat->i,j to others, and recv from other's */ 2271 /*------------------------------------------------*/ 2272 for (i=0; i<nsends; i++){ 2273 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2274 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2275 } 2276 /* wait on receives of mat->i,j */ 2277 /*------------------------------*/ 2278 count = nrecvs; 2279 while (count) { 2280 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2281 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2282 count--; 2283 } 2284 /* wait on sends of mat->i,j */ 2285 /*---------------------------*/ 2286 if (nsends) { 2287 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2288 } 2289 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2290 2291 /* post receives, send and receive mat->a */ 2292 /*----------------------------------------*/ 2293 for (imdex=0; imdex<nrecvs; imdex++) { 2294 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2295 } 2296 for (i=0; i<nsends; i++){ 2297 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2298 } 2299 count = nrecvs; 2300 while (count) { 2301 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2302 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2303 count--; 2304 } 2305 if (nsends) { 2306 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2307 } 2308 2309 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2310 2311 /* create redundant matrix */ 2312 /*-------------------------*/ 2313 if (reuse == MAT_INITIAL_MATRIX){ 2314 /* compute rownz_max for preallocation */ 2315 for (imdex=0; imdex<nrecvs; imdex++){ 2316 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2317 rptr = rbuf_j[imdex]; 2318 for (i=0; i<j; i++){ 2319 ncols = rptr[i+1] - rptr[i]; 2320 if (rownz_max < ncols) rownz_max = ncols; 2321 } 2322 } 2323 2324 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2325 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2326 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2327 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2328 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2329 } else { 2330 C = *matredundant; 2331 } 2332 2333 /* insert local matrix entries */ 2334 rptr = sbuf_j; 2335 cols = sbuf_j + rend-rstart + 1; 2336 vals = sbuf_a; 2337 for (i=0; i<rend-rstart; i++){ 2338 row = i + rstart; 2339 ncols = rptr[i+1] - rptr[i]; 2340 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2341 vals += ncols; 2342 cols += ncols; 2343 } 2344 /* insert received matrix entries */ 2345 for (imdex=0; imdex<nrecvs; imdex++){ 2346 rstart = rowrange[recv_rank[imdex]]; 2347 rend = rowrange[recv_rank[imdex]+1]; 2348 rptr = rbuf_j[imdex]; 2349 cols = rbuf_j[imdex] + rend-rstart + 1; 2350 vals = rbuf_a[imdex]; 2351 for (i=0; i<rend-rstart; i++){ 2352 row = i + rstart; 2353 ncols = rptr[i+1] - rptr[i]; 2354 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2355 vals += ncols; 2356 cols += ncols; 2357 } 2358 } 2359 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2362 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2363 if (reuse == MAT_INITIAL_MATRIX){ 2364 PetscContainer container; 2365 *matredundant = C; 2366 /* create a supporting struct and attach it to C for reuse */ 2367 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2368 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2369 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2370 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2371 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2372 2373 redund->nzlocal = nzlocal; 2374 redund->nsends = nsends; 2375 redund->nrecvs = nrecvs; 2376 redund->send_rank = send_rank; 2377 redund->recv_rank = recv_rank; 2378 redund->sbuf_nz = sbuf_nz; 2379 redund->rbuf_nz = rbuf_nz; 2380 redund->sbuf_j = sbuf_j; 2381 redund->sbuf_a = sbuf_a; 2382 redund->rbuf_j = rbuf_j; 2383 redund->rbuf_a = rbuf_a; 2384 2385 redund->MatDestroy = C->ops->destroy; 2386 C->ops->destroy = MatDestroy_MatRedundant; 2387 } 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2393 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2394 { 2395 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2396 PetscErrorCode ierr; 2397 PetscInt i,*idxb = 0; 2398 PetscScalar *va,*vb; 2399 Vec vtmp; 2400 2401 PetscFunctionBegin; 2402 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2403 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2404 if (idx) { 2405 for (i=0; i<A->rmap->n; i++) { 2406 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2407 } 2408 } 2409 2410 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2411 if (idx) { 2412 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2413 } 2414 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2415 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2416 2417 for (i=0; i<A->rmap->n; i++){ 2418 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2419 va[i] = vb[i]; 2420 if (idx) idx[i] = a->garray[idxb[i]]; 2421 } 2422 } 2423 2424 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2425 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2426 if (idxb) { 2427 ierr = PetscFree(idxb);CHKERRQ(ierr); 2428 } 2429 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2435 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2436 { 2437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2438 PetscErrorCode ierr; 2439 PetscInt i,*idxb = 0; 2440 PetscScalar *va,*vb; 2441 Vec vtmp; 2442 2443 PetscFunctionBegin; 2444 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2445 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2446 if (idx) { 2447 for (i=0; i<A->cmap->n; i++) { 2448 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2449 } 2450 } 2451 2452 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2453 if (idx) { 2454 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2455 } 2456 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2457 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2458 2459 for (i=0; i<A->rmap->n; i++){ 2460 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2461 va[i] = vb[i]; 2462 if (idx) idx[i] = a->garray[idxb[i]]; 2463 } 2464 } 2465 2466 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2467 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2468 if (idxb) { 2469 ierr = PetscFree(idxb);CHKERRQ(ierr); 2470 } 2471 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 #undef __FUNCT__ 2476 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2477 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2478 { 2479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2480 PetscInt n = A->rmap->n; 2481 PetscInt cstart = A->cmap->rstart; 2482 PetscInt *cmap = mat->garray; 2483 PetscInt *diagIdx, *offdiagIdx; 2484 Vec diagV, offdiagV; 2485 PetscScalar *a, *diagA, *offdiagA; 2486 PetscInt r; 2487 PetscErrorCode ierr; 2488 2489 PetscFunctionBegin; 2490 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2491 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2492 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2493 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2494 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2495 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2496 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2497 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2498 for(r = 0; r < n; ++r) { 2499 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2500 a[r] = diagA[r]; 2501 idx[r] = cstart + diagIdx[r]; 2502 } else { 2503 a[r] = offdiagA[r]; 2504 idx[r] = cmap[offdiagIdx[r]]; 2505 } 2506 } 2507 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2508 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2509 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2510 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2511 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2512 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2518 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2519 { 2520 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2521 PetscInt n = A->rmap->n; 2522 PetscInt cstart = A->cmap->rstart; 2523 PetscInt *cmap = mat->garray; 2524 PetscInt *diagIdx, *offdiagIdx; 2525 Vec diagV, offdiagV; 2526 PetscScalar *a, *diagA, *offdiagA; 2527 PetscInt r; 2528 PetscErrorCode ierr; 2529 2530 PetscFunctionBegin; 2531 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2532 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2533 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2534 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2535 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2536 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2537 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2538 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2539 for(r = 0; r < n; ++r) { 2540 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2541 a[r] = diagA[r]; 2542 idx[r] = cstart + diagIdx[r]; 2543 } else { 2544 a[r] = offdiagA[r]; 2545 idx[r] = cmap[offdiagIdx[r]]; 2546 } 2547 } 2548 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2549 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2550 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2551 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2552 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2553 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2559 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2560 { 2561 PetscErrorCode ierr; 2562 Mat *dummy; 2563 2564 PetscFunctionBegin; 2565 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2566 *newmat = *dummy; 2567 ierr = PetscFree(dummy);CHKERRQ(ierr); 2568 PetscFunctionReturn(0); 2569 } 2570 2571 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2572 /* -------------------------------------------------------------------*/ 2573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2574 MatGetRow_MPIAIJ, 2575 MatRestoreRow_MPIAIJ, 2576 MatMult_MPIAIJ, 2577 /* 4*/ MatMultAdd_MPIAIJ, 2578 MatMultTranspose_MPIAIJ, 2579 MatMultTransposeAdd_MPIAIJ, 2580 #ifdef PETSC_HAVE_PBGL 2581 MatSolve_MPIAIJ, 2582 #else 2583 0, 2584 #endif 2585 0, 2586 0, 2587 /*10*/ 0, 2588 0, 2589 0, 2590 MatSOR_MPIAIJ, 2591 MatTranspose_MPIAIJ, 2592 /*15*/ MatGetInfo_MPIAIJ, 2593 MatEqual_MPIAIJ, 2594 MatGetDiagonal_MPIAIJ, 2595 MatDiagonalScale_MPIAIJ, 2596 MatNorm_MPIAIJ, 2597 /*20*/ MatAssemblyBegin_MPIAIJ, 2598 MatAssemblyEnd_MPIAIJ, 2599 MatSetOption_MPIAIJ, 2600 MatZeroEntries_MPIAIJ, 2601 /*24*/ MatZeroRows_MPIAIJ, 2602 0, 2603 #ifdef PETSC_HAVE_PBGL 2604 0, 2605 #else 2606 0, 2607 #endif 2608 0, 2609 0, 2610 /*29*/ MatSetUpPreallocation_MPIAIJ, 2611 #ifdef PETSC_HAVE_PBGL 2612 0, 2613 #else 2614 0, 2615 #endif 2616 0, 2617 0, 2618 0, 2619 /*34*/ MatDuplicate_MPIAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*39*/ MatAXPY_MPIAIJ, 2625 MatGetSubMatrices_MPIAIJ, 2626 MatIncreaseOverlap_MPIAIJ, 2627 MatGetValues_MPIAIJ, 2628 MatCopy_MPIAIJ, 2629 /*44*/ MatGetRowMax_MPIAIJ, 2630 MatScale_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 /*49*/ MatSetBlockSize_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*54*/ MatFDColoringCreate_MPIAIJ, 2640 0, 2641 MatSetUnfactored_MPIAIJ, 2642 MatPermute_MPIAIJ, 2643 0, 2644 /*59*/ MatGetSubMatrix_MPIAIJ, 2645 MatDestroy_MPIAIJ, 2646 MatView_MPIAIJ, 2647 0, 2648 0, 2649 /*64*/ 0, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2655 MatGetRowMinAbs_MPIAIJ, 2656 0, 2657 MatSetColoring_MPIAIJ, 2658 #if defined(PETSC_HAVE_ADIC) 2659 MatSetValuesAdic_MPIAIJ, 2660 #else 2661 0, 2662 #endif 2663 MatSetValuesAdifor_MPIAIJ, 2664 /*75*/ MatFDColoringApply_AIJ, 2665 0, 2666 0, 2667 0, 2668 0, 2669 /*80*/ 0, 2670 0, 2671 0, 2672 /*83*/ MatLoad_MPIAIJ, 2673 0, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2679 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2680 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2681 MatPtAP_Basic, 2682 MatPtAPSymbolic_MPIAIJ, 2683 /*94*/ MatPtAPNumeric_MPIAIJ, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*99*/ 0, 2689 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2690 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2691 MatConjugate_MPIAIJ, 2692 0, 2693 /*104*/MatSetValuesRow_MPIAIJ, 2694 MatRealPart_MPIAIJ, 2695 MatImaginaryPart_MPIAIJ, 2696 0, 2697 0, 2698 /*109*/0, 2699 MatGetRedundantMatrix_MPIAIJ, 2700 MatGetRowMin_MPIAIJ, 2701 0, 2702 0, 2703 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2704 0, 2705 0, 2706 0, 2707 0, 2708 /*119*/0, 2709 0, 2710 0, 2711 0, 2712 MatLoadnew_MPIAIJ 2713 }; 2714 2715 /* ----------------------------------------------------------------------------------------*/ 2716 2717 EXTERN_C_BEGIN 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2720 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2721 { 2722 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2723 PetscErrorCode ierr; 2724 2725 PetscFunctionBegin; 2726 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2727 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2728 PetscFunctionReturn(0); 2729 } 2730 EXTERN_C_END 2731 2732 EXTERN_C_BEGIN 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2735 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2736 { 2737 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2738 PetscErrorCode ierr; 2739 2740 PetscFunctionBegin; 2741 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2742 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2743 PetscFunctionReturn(0); 2744 } 2745 EXTERN_C_END 2746 2747 #include "petscpc.h" 2748 EXTERN_C_BEGIN 2749 #undef __FUNCT__ 2750 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2751 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2752 { 2753 Mat_MPIAIJ *b; 2754 PetscErrorCode ierr; 2755 PetscInt i; 2756 2757 PetscFunctionBegin; 2758 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2759 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2760 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2761 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2762 2763 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2764 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2765 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2766 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2767 if (d_nnz) { 2768 for (i=0; i<B->rmap->n; i++) { 2769 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2770 } 2771 } 2772 if (o_nnz) { 2773 for (i=0; i<B->rmap->n; i++) { 2774 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2775 } 2776 } 2777 b = (Mat_MPIAIJ*)B->data; 2778 2779 if (!B->preallocated) { 2780 /* Explicitly create 2 MATSEQAIJ matrices. */ 2781 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2782 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2783 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2785 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2786 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2787 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2788 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2789 } 2790 2791 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2792 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2793 B->preallocated = PETSC_TRUE; 2794 PetscFunctionReturn(0); 2795 } 2796 EXTERN_C_END 2797 2798 #undef __FUNCT__ 2799 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2800 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2801 { 2802 Mat mat; 2803 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2804 PetscErrorCode ierr; 2805 2806 PetscFunctionBegin; 2807 *newmat = 0; 2808 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2809 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2810 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2811 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2812 a = (Mat_MPIAIJ*)mat->data; 2813 2814 mat->factortype = matin->factortype; 2815 mat->rmap->bs = matin->rmap->bs; 2816 mat->assembled = PETSC_TRUE; 2817 mat->insertmode = NOT_SET_VALUES; 2818 mat->preallocated = PETSC_TRUE; 2819 2820 a->size = oldmat->size; 2821 a->rank = oldmat->rank; 2822 a->donotstash = oldmat->donotstash; 2823 a->roworiented = oldmat->roworiented; 2824 a->rowindices = 0; 2825 a->rowvalues = 0; 2826 a->getrowactive = PETSC_FALSE; 2827 2828 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2829 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2830 2831 if (oldmat->colmap) { 2832 #if defined (PETSC_USE_CTABLE) 2833 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2834 #else 2835 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2836 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2837 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2838 #endif 2839 } else a->colmap = 0; 2840 if (oldmat->garray) { 2841 PetscInt len; 2842 len = oldmat->B->cmap->n; 2843 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2844 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2845 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2846 } else a->garray = 0; 2847 2848 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2849 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2850 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2851 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2852 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2854 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2856 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2857 *newmat = mat; 2858 PetscFunctionReturn(0); 2859 } 2860 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "MatLoad_MPIAIJ" 2863 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2864 { 2865 Mat A; 2866 PetscScalar *vals,*svals; 2867 MPI_Comm comm = ((PetscObject)viewer)->comm; 2868 MPI_Status status; 2869 PetscErrorCode ierr; 2870 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2871 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2872 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2873 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2874 PetscInt cend,cstart,n,*rowners; 2875 int fd; 2876 2877 PetscFunctionBegin; 2878 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2879 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2880 if (!rank) { 2881 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2882 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2883 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2884 } 2885 2886 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2887 M = header[1]; N = header[2]; 2888 /* determine ownership of all rows */ 2889 m = M/size + ((M % size) > rank); 2890 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2891 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2892 2893 /* First process needs enough room for process with most rows */ 2894 if (!rank) { 2895 mmax = rowners[1]; 2896 for (i=2; i<size; i++) { 2897 mmax = PetscMax(mmax,rowners[i]); 2898 } 2899 } else mmax = m; 2900 2901 rowners[0] = 0; 2902 for (i=2; i<=size; i++) { 2903 rowners[i] += rowners[i-1]; 2904 } 2905 rstart = rowners[rank]; 2906 rend = rowners[rank+1]; 2907 2908 /* distribute row lengths to all processors */ 2909 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2910 if (!rank) { 2911 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2912 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2913 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2914 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2915 for (j=0; j<m; j++) { 2916 procsnz[0] += ourlens[j]; 2917 } 2918 for (i=1; i<size; i++) { 2919 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2920 /* calculate the number of nonzeros on each processor */ 2921 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2922 procsnz[i] += rowlengths[j]; 2923 } 2924 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2925 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2926 } 2927 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2928 } else { 2929 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2930 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2931 } 2932 2933 if (!rank) { 2934 /* determine max buffer needed and allocate it */ 2935 maxnz = 0; 2936 for (i=0; i<size; i++) { 2937 maxnz = PetscMax(maxnz,procsnz[i]); 2938 } 2939 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2940 2941 /* read in my part of the matrix column indices */ 2942 nz = procsnz[0]; 2943 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2944 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2945 2946 /* read in every one elses and ship off */ 2947 for (i=1; i<size; i++) { 2948 nz = procsnz[i]; 2949 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2950 mpicnt = PetscMPIIntCast(nz); 2951 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2952 } 2953 ierr = PetscFree(cols);CHKERRQ(ierr); 2954 } else { 2955 /* determine buffer space needed for message */ 2956 nz = 0; 2957 for (i=0; i<m; i++) { 2958 nz += ourlens[i]; 2959 } 2960 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2961 2962 /* receive message of column indices*/ 2963 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2964 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2965 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2966 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 2967 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 2968 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 2969 } 2970 2971 /* determine column ownership if matrix is not square */ 2972 if (N != M) { 2973 n = N/size + ((N % size) > rank); 2974 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2975 cstart = cend - n; 2976 } else { 2977 cstart = rstart; 2978 cend = rend; 2979 n = cend - cstart; 2980 } 2981 2982 /* loop over local rows, determining number of off diagonal entries */ 2983 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2984 jj = 0; 2985 for (i=0; i<m; i++) { 2986 for (j=0; j<ourlens[i]; j++) { 2987 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2988 jj++; 2989 } 2990 } 2991 2992 /* create our matrix */ 2993 for (i=0; i<m; i++) { 2994 ourlens[i] -= offlens[i]; 2995 } 2996 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2997 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2998 ierr = MatSetType(A,type);CHKERRQ(ierr); 2999 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3000 3001 for (i=0; i<m; i++) { 3002 ourlens[i] += offlens[i]; 3003 } 3004 3005 if (!rank) { 3006 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3007 3008 /* read in my part of the matrix numerical values */ 3009 nz = procsnz[0]; 3010 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3011 3012 /* insert into matrix */ 3013 jj = rstart; 3014 smycols = mycols; 3015 svals = vals; 3016 for (i=0; i<m; i++) { 3017 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3018 smycols += ourlens[i]; 3019 svals += ourlens[i]; 3020 jj++; 3021 } 3022 3023 /* read in other processors and ship out */ 3024 for (i=1; i<size; i++) { 3025 nz = procsnz[i]; 3026 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3027 mpicnt = PetscMPIIntCast(nz); 3028 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3029 } 3030 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3031 } else { 3032 /* receive numeric values */ 3033 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3034 3035 /* receive message of values*/ 3036 mpicnt = PetscMPIIntCast(nz); 3037 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3038 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3039 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 3040 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 3041 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 3042 3043 /* insert into matrix */ 3044 jj = rstart; 3045 smycols = mycols; 3046 svals = vals; 3047 for (i=0; i<m; i++) { 3048 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3049 smycols += ourlens[i]; 3050 svals += ourlens[i]; 3051 jj++; 3052 } 3053 } 3054 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3055 ierr = PetscFree(vals);CHKERRQ(ierr); 3056 ierr = PetscFree(mycols);CHKERRQ(ierr); 3057 ierr = PetscFree(rowners);CHKERRQ(ierr); 3058 3059 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 *newmat = A; 3062 PetscFunctionReturn(0); 3063 } 3064 3065 #undef __FUNCT__ 3066 #define __FUNCT__ "MatLoadnew_MPIAIJ" 3067 PetscErrorCode MatLoadnew_MPIAIJ(PetscViewer viewer, Mat newMat) 3068 { 3069 PetscScalar *vals,*svals; 3070 MPI_Comm comm = ((PetscObject)viewer)->comm; 3071 MPI_Status status; 3072 PetscErrorCode ierr; 3073 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 3074 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3075 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3076 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3077 PetscInt cend,cstart,n,*rowners,sizesset=1; 3078 int fd; 3079 3080 PetscFunctionBegin; 3081 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3082 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3083 if (!rank) { 3084 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3085 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3086 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3087 } 3088 3089 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3090 3091 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3092 M = header[1]; N = header[2]; 3093 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3094 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3095 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3096 3097 /* If global sizes are set, check if they are consistent with that given in the file */ 3098 if (sizesset) { 3099 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3100 } 3101 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3102 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3103 3104 /* determine ownership of all rows */ 3105 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3106 else m = newMat->rmap->n; 3107 3108 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3109 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3110 3111 /* First process needs enough room for process with most rows */ 3112 if (!rank) { 3113 mmax = rowners[1]; 3114 for (i=2; i<size; i++) { 3115 mmax = PetscMax(mmax,rowners[i]); 3116 } 3117 } else mmax = m; 3118 3119 rowners[0] = 0; 3120 for (i=2; i<=size; i++) { 3121 rowners[i] += rowners[i-1]; 3122 } 3123 rstart = rowners[rank]; 3124 rend = rowners[rank+1]; 3125 3126 /* distribute row lengths to all processors */ 3127 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3128 if (!rank) { 3129 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3130 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3131 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3132 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3133 for (j=0; j<m; j++) { 3134 procsnz[0] += ourlens[j]; 3135 } 3136 for (i=1; i<size; i++) { 3137 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3138 /* calculate the number of nonzeros on each processor */ 3139 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3140 procsnz[i] += rowlengths[j]; 3141 } 3142 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 3143 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3144 } 3145 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3146 } else { 3147 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 3148 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3149 } 3150 3151 if (!rank) { 3152 /* determine max buffer needed and allocate it */ 3153 maxnz = 0; 3154 for (i=0; i<size; i++) { 3155 maxnz = PetscMax(maxnz,procsnz[i]); 3156 } 3157 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3158 3159 /* read in my part of the matrix column indices */ 3160 nz = procsnz[0]; 3161 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3162 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3163 3164 /* read in every one elses and ship off */ 3165 for (i=1; i<size; i++) { 3166 nz = procsnz[i]; 3167 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3168 mpicnt = PetscMPIIntCast(nz); 3169 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3170 } 3171 ierr = PetscFree(cols);CHKERRQ(ierr); 3172 } else { 3173 /* determine buffer space needed for message */ 3174 nz = 0; 3175 for (i=0; i<m; i++) { 3176 nz += ourlens[i]; 3177 } 3178 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3179 3180 /* receive message of column indices*/ 3181 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 3182 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3183 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 3184 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 3185 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 3186 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 3187 } 3188 3189 /* determine column ownership if matrix is not square */ 3190 if (N != M) { 3191 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3192 else n = newMat->cmap->n; 3193 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3194 cstart = cend - n; 3195 } else { 3196 cstart = rstart; 3197 cend = rend; 3198 n = cend - cstart; 3199 } 3200 3201 /* loop over local rows, determining number of off diagonal entries */ 3202 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3203 jj = 0; 3204 for (i=0; i<m; i++) { 3205 for (j=0; j<ourlens[i]; j++) { 3206 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3207 jj++; 3208 } 3209 } 3210 3211 /* create our matrix */ 3212 for (i=0; i<m; i++) { 3213 ourlens[i] -= offlens[i]; 3214 } 3215 if (!sizesset) { 3216 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3217 } 3218 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3219 3220 for (i=0; i<m; i++) { 3221 ourlens[i] += offlens[i]; 3222 } 3223 3224 if (!rank) { 3225 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3226 3227 /* read in my part of the matrix numerical values */ 3228 nz = procsnz[0]; 3229 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3230 3231 /* insert into matrix */ 3232 jj = rstart; 3233 smycols = mycols; 3234 svals = vals; 3235 for (i=0; i<m; i++) { 3236 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3237 smycols += ourlens[i]; 3238 svals += ourlens[i]; 3239 jj++; 3240 } 3241 3242 /* read in other processors and ship out */ 3243 for (i=1; i<size; i++) { 3244 nz = procsnz[i]; 3245 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3246 mpicnt = PetscMPIIntCast(nz); 3247 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3248 } 3249 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3250 } else { 3251 /* receive numeric values */ 3252 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3253 3254 /* receive message of values*/ 3255 mpicnt = PetscMPIIntCast(nz); 3256 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm,&status);CHKERRQ(ierr); 3257 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3258 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 3259 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 3260 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 3261 3262 /* insert into matrix */ 3263 jj = rstart; 3264 smycols = mycols; 3265 svals = vals; 3266 for (i=0; i<m; i++) { 3267 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3268 smycols += ourlens[i]; 3269 svals += ourlens[i]; 3270 jj++; 3271 } 3272 } 3273 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3274 ierr = PetscFree(vals);CHKERRQ(ierr); 3275 ierr = PetscFree(mycols);CHKERRQ(ierr); 3276 ierr = PetscFree(rowners);CHKERRQ(ierr); 3277 3278 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3279 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3280 PetscFunctionReturn(0); 3281 } 3282 3283 #undef __FUNCT__ 3284 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3285 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3286 { 3287 PetscErrorCode ierr; 3288 IS iscol_local; 3289 PetscInt csize; 3290 3291 PetscFunctionBegin; 3292 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3293 if (call == MAT_REUSE_MATRIX) { 3294 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3295 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3296 } else { 3297 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3298 } 3299 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3300 if (call == MAT_INITIAL_MATRIX) { 3301 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3302 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3303 } 3304 PetscFunctionReturn(0); 3305 } 3306 3307 #undef __FUNCT__ 3308 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3309 /* 3310 Not great since it makes two copies of the submatrix, first an SeqAIJ 3311 in local and then by concatenating the local matrices the end result. 3312 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3313 3314 Note: This requires a sequential iscol with all indices. 3315 */ 3316 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3317 { 3318 PetscErrorCode ierr; 3319 PetscMPIInt rank,size; 3320 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3321 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3322 Mat *local,M,Mreuse; 3323 MatScalar *vwork,*aa; 3324 MPI_Comm comm = ((PetscObject)mat)->comm; 3325 Mat_SeqAIJ *aij; 3326 3327 3328 PetscFunctionBegin; 3329 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3330 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3331 3332 if (call == MAT_REUSE_MATRIX) { 3333 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3334 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3335 local = &Mreuse; 3336 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3337 } else { 3338 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3339 Mreuse = *local; 3340 ierr = PetscFree(local);CHKERRQ(ierr); 3341 } 3342 3343 /* 3344 m - number of local rows 3345 n - number of columns (same on all processors) 3346 rstart - first row in new global matrix generated 3347 */ 3348 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3349 if (call == MAT_INITIAL_MATRIX) { 3350 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3351 ii = aij->i; 3352 jj = aij->j; 3353 3354 /* 3355 Determine the number of non-zeros in the diagonal and off-diagonal 3356 portions of the matrix in order to do correct preallocation 3357 */ 3358 3359 /* first get start and end of "diagonal" columns */ 3360 if (csize == PETSC_DECIDE) { 3361 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3362 if (mglobal == n) { /* square matrix */ 3363 nlocal = m; 3364 } else { 3365 nlocal = n/size + ((n % size) > rank); 3366 } 3367 } else { 3368 nlocal = csize; 3369 } 3370 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3371 rstart = rend - nlocal; 3372 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3373 3374 /* next, compute all the lengths */ 3375 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3376 olens = dlens + m; 3377 for (i=0; i<m; i++) { 3378 jend = ii[i+1] - ii[i]; 3379 olen = 0; 3380 dlen = 0; 3381 for (j=0; j<jend; j++) { 3382 if (*jj < rstart || *jj >= rend) olen++; 3383 else dlen++; 3384 jj++; 3385 } 3386 olens[i] = olen; 3387 dlens[i] = dlen; 3388 } 3389 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3390 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3391 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3392 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3393 ierr = PetscFree(dlens);CHKERRQ(ierr); 3394 } else { 3395 PetscInt ml,nl; 3396 3397 M = *newmat; 3398 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3399 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3400 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3401 /* 3402 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3403 rather than the slower MatSetValues(). 3404 */ 3405 M->was_assembled = PETSC_TRUE; 3406 M->assembled = PETSC_FALSE; 3407 } 3408 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3409 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3410 ii = aij->i; 3411 jj = aij->j; 3412 aa = aij->a; 3413 for (i=0; i<m; i++) { 3414 row = rstart + i; 3415 nz = ii[i+1] - ii[i]; 3416 cwork = jj; jj += nz; 3417 vwork = aa; aa += nz; 3418 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3419 } 3420 3421 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3422 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3423 *newmat = M; 3424 3425 /* save submatrix used in processor for next request */ 3426 if (call == MAT_INITIAL_MATRIX) { 3427 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3428 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3429 } 3430 3431 PetscFunctionReturn(0); 3432 } 3433 3434 EXTERN_C_BEGIN 3435 #undef __FUNCT__ 3436 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3437 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3438 { 3439 PetscInt m,cstart, cend,j,nnz,i,d; 3440 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3441 const PetscInt *JJ; 3442 PetscScalar *values; 3443 PetscErrorCode ierr; 3444 3445 PetscFunctionBegin; 3446 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3447 3448 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3449 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3450 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3451 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3452 m = B->rmap->n; 3453 cstart = B->cmap->rstart; 3454 cend = B->cmap->rend; 3455 rstart = B->rmap->rstart; 3456 3457 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3458 3459 #if defined(PETSC_USE_DEBUGGING) 3460 for (i=0; i<m; i++) { 3461 nnz = Ii[i+1]- Ii[i]; 3462 JJ = J + Ii[i]; 3463 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3464 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3465 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3466 } 3467 #endif 3468 3469 for (i=0; i<m; i++) { 3470 nnz = Ii[i+1]- Ii[i]; 3471 JJ = J + Ii[i]; 3472 nnz_max = PetscMax(nnz_max,nnz); 3473 d = 0; 3474 for (j=0; j<nnz; j++) { 3475 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3476 } 3477 d_nnz[i] = d; 3478 o_nnz[i] = nnz - d; 3479 } 3480 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3481 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3482 3483 if (v) values = (PetscScalar*)v; 3484 else { 3485 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3486 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3487 } 3488 3489 for (i=0; i<m; i++) { 3490 ii = i + rstart; 3491 nnz = Ii[i+1]- Ii[i]; 3492 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3493 } 3494 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3495 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3496 3497 if (!v) { 3498 ierr = PetscFree(values);CHKERRQ(ierr); 3499 } 3500 PetscFunctionReturn(0); 3501 } 3502 EXTERN_C_END 3503 3504 #undef __FUNCT__ 3505 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3506 /*@ 3507 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3508 (the default parallel PETSc format). 3509 3510 Collective on MPI_Comm 3511 3512 Input Parameters: 3513 + B - the matrix 3514 . i - the indices into j for the start of each local row (starts with zero) 3515 . j - the column indices for each local row (starts with zero) 3516 - v - optional values in the matrix 3517 3518 Level: developer 3519 3520 Notes: 3521 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3522 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3523 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3524 3525 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3526 3527 The format which is used for the sparse matrix input, is equivalent to a 3528 row-major ordering.. i.e for the following matrix, the input data expected is 3529 as shown: 3530 3531 1 0 0 3532 2 0 3 P0 3533 ------- 3534 4 5 6 P1 3535 3536 Process0 [P0]: rows_owned=[0,1] 3537 i = {0,1,3} [size = nrow+1 = 2+1] 3538 j = {0,0,2} [size = nz = 6] 3539 v = {1,2,3} [size = nz = 6] 3540 3541 Process1 [P1]: rows_owned=[2] 3542 i = {0,3} [size = nrow+1 = 1+1] 3543 j = {0,1,2} [size = nz = 6] 3544 v = {4,5,6} [size = nz = 6] 3545 3546 .keywords: matrix, aij, compressed row, sparse, parallel 3547 3548 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3549 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3550 @*/ 3551 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3552 { 3553 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3554 3555 PetscFunctionBegin; 3556 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3557 if (f) { 3558 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3559 } 3560 PetscFunctionReturn(0); 3561 } 3562 3563 #undef __FUNCT__ 3564 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3565 /*@C 3566 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3567 (the default parallel PETSc format). For good matrix assembly performance 3568 the user should preallocate the matrix storage by setting the parameters 3569 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3570 performance can be increased by more than a factor of 50. 3571 3572 Collective on MPI_Comm 3573 3574 Input Parameters: 3575 + A - the matrix 3576 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3577 (same value is used for all local rows) 3578 . d_nnz - array containing the number of nonzeros in the various rows of the 3579 DIAGONAL portion of the local submatrix (possibly different for each row) 3580 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3581 The size of this array is equal to the number of local rows, i.e 'm'. 3582 You must leave room for the diagonal entry even if it is zero. 3583 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3584 submatrix (same value is used for all local rows). 3585 - o_nnz - array containing the number of nonzeros in the various rows of the 3586 OFF-DIAGONAL portion of the local submatrix (possibly different for 3587 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3588 structure. The size of this array is equal to the number 3589 of local rows, i.e 'm'. 3590 3591 If the *_nnz parameter is given then the *_nz parameter is ignored 3592 3593 The AIJ format (also called the Yale sparse matrix format or 3594 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3595 storage. The stored row and column indices begin with zero. See the users manual for details. 3596 3597 The parallel matrix is partitioned such that the first m0 rows belong to 3598 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3599 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3600 3601 The DIAGONAL portion of the local submatrix of a processor can be defined 3602 as the submatrix which is obtained by extraction the part corresponding 3603 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3604 first row that belongs to the processor, and r2 is the last row belonging 3605 to the this processor. This is a square mxm matrix. The remaining portion 3606 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3607 3608 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3609 3610 You can call MatGetInfo() to get information on how effective the preallocation was; 3611 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3612 You can also run with the option -info and look for messages with the string 3613 malloc in them to see if additional memory allocation was needed. 3614 3615 Example usage: 3616 3617 Consider the following 8x8 matrix with 34 non-zero values, that is 3618 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3619 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3620 as follows: 3621 3622 .vb 3623 1 2 0 | 0 3 0 | 0 4 3624 Proc0 0 5 6 | 7 0 0 | 8 0 3625 9 0 10 | 11 0 0 | 12 0 3626 ------------------------------------- 3627 13 0 14 | 15 16 17 | 0 0 3628 Proc1 0 18 0 | 19 20 21 | 0 0 3629 0 0 0 | 22 23 0 | 24 0 3630 ------------------------------------- 3631 Proc2 25 26 27 | 0 0 28 | 29 0 3632 30 0 0 | 31 32 33 | 0 34 3633 .ve 3634 3635 This can be represented as a collection of submatrices as: 3636 3637 .vb 3638 A B C 3639 D E F 3640 G H I 3641 .ve 3642 3643 Where the submatrices A,B,C are owned by proc0, D,E,F are 3644 owned by proc1, G,H,I are owned by proc2. 3645 3646 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3647 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3648 The 'M','N' parameters are 8,8, and have the same values on all procs. 3649 3650 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3651 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3652 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3653 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3654 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3655 matrix, ans [DF] as another SeqAIJ matrix. 3656 3657 When d_nz, o_nz parameters are specified, d_nz storage elements are 3658 allocated for every row of the local diagonal submatrix, and o_nz 3659 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3660 One way to choose d_nz and o_nz is to use the max nonzerors per local 3661 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3662 In this case, the values of d_nz,o_nz are: 3663 .vb 3664 proc0 : dnz = 2, o_nz = 2 3665 proc1 : dnz = 3, o_nz = 2 3666 proc2 : dnz = 1, o_nz = 4 3667 .ve 3668 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3669 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3670 for proc3. i.e we are using 12+15+10=37 storage locations to store 3671 34 values. 3672 3673 When d_nnz, o_nnz parameters are specified, the storage is specified 3674 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3675 In the above case the values for d_nnz,o_nnz are: 3676 .vb 3677 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3678 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3679 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3680 .ve 3681 Here the space allocated is sum of all the above values i.e 34, and 3682 hence pre-allocation is perfect. 3683 3684 Level: intermediate 3685 3686 .keywords: matrix, aij, compressed row, sparse, parallel 3687 3688 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3689 MPIAIJ, MatGetInfo() 3690 @*/ 3691 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3692 { 3693 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3694 3695 PetscFunctionBegin; 3696 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3697 if (f) { 3698 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3699 } 3700 PetscFunctionReturn(0); 3701 } 3702 3703 #undef __FUNCT__ 3704 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3705 /*@ 3706 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3707 CSR format the local rows. 3708 3709 Collective on MPI_Comm 3710 3711 Input Parameters: 3712 + comm - MPI communicator 3713 . m - number of local rows (Cannot be PETSC_DECIDE) 3714 . n - This value should be the same as the local size used in creating the 3715 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3716 calculated if N is given) For square matrices n is almost always m. 3717 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3718 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3719 . i - row indices 3720 . j - column indices 3721 - a - matrix values 3722 3723 Output Parameter: 3724 . mat - the matrix 3725 3726 Level: intermediate 3727 3728 Notes: 3729 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3730 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3731 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3732 3733 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3734 3735 The format which is used for the sparse matrix input, is equivalent to a 3736 row-major ordering.. i.e for the following matrix, the input data expected is 3737 as shown: 3738 3739 1 0 0 3740 2 0 3 P0 3741 ------- 3742 4 5 6 P1 3743 3744 Process0 [P0]: rows_owned=[0,1] 3745 i = {0,1,3} [size = nrow+1 = 2+1] 3746 j = {0,0,2} [size = nz = 6] 3747 v = {1,2,3} [size = nz = 6] 3748 3749 Process1 [P1]: rows_owned=[2] 3750 i = {0,3} [size = nrow+1 = 1+1] 3751 j = {0,1,2} [size = nz = 6] 3752 v = {4,5,6} [size = nz = 6] 3753 3754 .keywords: matrix, aij, compressed row, sparse, parallel 3755 3756 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3757 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3758 @*/ 3759 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3760 { 3761 PetscErrorCode ierr; 3762 3763 PetscFunctionBegin; 3764 if (i[0]) { 3765 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3766 } 3767 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3768 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3769 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3770 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3771 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3772 PetscFunctionReturn(0); 3773 } 3774 3775 #undef __FUNCT__ 3776 #define __FUNCT__ "MatCreateMPIAIJ" 3777 /*@C 3778 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3779 (the default parallel PETSc format). For good matrix assembly performance 3780 the user should preallocate the matrix storage by setting the parameters 3781 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3782 performance can be increased by more than a factor of 50. 3783 3784 Collective on MPI_Comm 3785 3786 Input Parameters: 3787 + comm - MPI communicator 3788 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3789 This value should be the same as the local size used in creating the 3790 y vector for the matrix-vector product y = Ax. 3791 . n - This value should be the same as the local size used in creating the 3792 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3793 calculated if N is given) For square matrices n is almost always m. 3794 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3795 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3796 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3797 (same value is used for all local rows) 3798 . d_nnz - array containing the number of nonzeros in the various rows of the 3799 DIAGONAL portion of the local submatrix (possibly different for each row) 3800 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3801 The size of this array is equal to the number of local rows, i.e 'm'. 3802 You must leave room for the diagonal entry even if it is zero. 3803 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3804 submatrix (same value is used for all local rows). 3805 - o_nnz - array containing the number of nonzeros in the various rows of the 3806 OFF-DIAGONAL portion of the local submatrix (possibly different for 3807 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3808 structure. The size of this array is equal to the number 3809 of local rows, i.e 'm'. 3810 3811 Output Parameter: 3812 . A - the matrix 3813 3814 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3815 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3816 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3817 3818 Notes: 3819 If the *_nnz parameter is given then the *_nz parameter is ignored 3820 3821 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3822 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3823 storage requirements for this matrix. 3824 3825 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3826 processor than it must be used on all processors that share the object for 3827 that argument. 3828 3829 The user MUST specify either the local or global matrix dimensions 3830 (possibly both). 3831 3832 The parallel matrix is partitioned across processors such that the 3833 first m0 rows belong to process 0, the next m1 rows belong to 3834 process 1, the next m2 rows belong to process 2 etc.. where 3835 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3836 values corresponding to [m x N] submatrix. 3837 3838 The columns are logically partitioned with the n0 columns belonging 3839 to 0th partition, the next n1 columns belonging to the next 3840 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3841 3842 The DIAGONAL portion of the local submatrix on any given processor 3843 is the submatrix corresponding to the rows and columns m,n 3844 corresponding to the given processor. i.e diagonal matrix on 3845 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3846 etc. The remaining portion of the local submatrix [m x (N-n)] 3847 constitute the OFF-DIAGONAL portion. The example below better 3848 illustrates this concept. 3849 3850 For a square global matrix we define each processor's diagonal portion 3851 to be its local rows and the corresponding columns (a square submatrix); 3852 each processor's off-diagonal portion encompasses the remainder of the 3853 local matrix (a rectangular submatrix). 3854 3855 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3856 3857 When calling this routine with a single process communicator, a matrix of 3858 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3859 type of communicator, use the construction mechanism: 3860 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3861 3862 By default, this format uses inodes (identical nodes) when possible. 3863 We search for consecutive rows with the same nonzero structure, thereby 3864 reusing matrix information to achieve increased efficiency. 3865 3866 Options Database Keys: 3867 + -mat_no_inode - Do not use inodes 3868 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3869 - -mat_aij_oneindex - Internally use indexing starting at 1 3870 rather than 0. Note that when calling MatSetValues(), 3871 the user still MUST index entries starting at 0! 3872 3873 3874 Example usage: 3875 3876 Consider the following 8x8 matrix with 34 non-zero values, that is 3877 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3878 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3879 as follows: 3880 3881 .vb 3882 1 2 0 | 0 3 0 | 0 4 3883 Proc0 0 5 6 | 7 0 0 | 8 0 3884 9 0 10 | 11 0 0 | 12 0 3885 ------------------------------------- 3886 13 0 14 | 15 16 17 | 0 0 3887 Proc1 0 18 0 | 19 20 21 | 0 0 3888 0 0 0 | 22 23 0 | 24 0 3889 ------------------------------------- 3890 Proc2 25 26 27 | 0 0 28 | 29 0 3891 30 0 0 | 31 32 33 | 0 34 3892 .ve 3893 3894 This can be represented as a collection of submatrices as: 3895 3896 .vb 3897 A B C 3898 D E F 3899 G H I 3900 .ve 3901 3902 Where the submatrices A,B,C are owned by proc0, D,E,F are 3903 owned by proc1, G,H,I are owned by proc2. 3904 3905 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3906 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3907 The 'M','N' parameters are 8,8, and have the same values on all procs. 3908 3909 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3910 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3911 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3912 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3913 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3914 matrix, ans [DF] as another SeqAIJ matrix. 3915 3916 When d_nz, o_nz parameters are specified, d_nz storage elements are 3917 allocated for every row of the local diagonal submatrix, and o_nz 3918 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3919 One way to choose d_nz and o_nz is to use the max nonzerors per local 3920 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3921 In this case, the values of d_nz,o_nz are: 3922 .vb 3923 proc0 : dnz = 2, o_nz = 2 3924 proc1 : dnz = 3, o_nz = 2 3925 proc2 : dnz = 1, o_nz = 4 3926 .ve 3927 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3928 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3929 for proc3. i.e we are using 12+15+10=37 storage locations to store 3930 34 values. 3931 3932 When d_nnz, o_nnz parameters are specified, the storage is specified 3933 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3934 In the above case the values for d_nnz,o_nnz are: 3935 .vb 3936 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3937 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3938 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3939 .ve 3940 Here the space allocated is sum of all the above values i.e 34, and 3941 hence pre-allocation is perfect. 3942 3943 Level: intermediate 3944 3945 .keywords: matrix, aij, compressed row, sparse, parallel 3946 3947 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3948 MPIAIJ, MatCreateMPIAIJWithArrays() 3949 @*/ 3950 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3951 { 3952 PetscErrorCode ierr; 3953 PetscMPIInt size; 3954 3955 PetscFunctionBegin; 3956 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3957 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3958 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3959 if (size > 1) { 3960 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3961 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3962 } else { 3963 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3964 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3965 } 3966 PetscFunctionReturn(0); 3967 } 3968 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3971 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3972 { 3973 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3974 3975 PetscFunctionBegin; 3976 *Ad = a->A; 3977 *Ao = a->B; 3978 *colmap = a->garray; 3979 PetscFunctionReturn(0); 3980 } 3981 3982 #undef __FUNCT__ 3983 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3984 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3985 { 3986 PetscErrorCode ierr; 3987 PetscInt i; 3988 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3989 3990 PetscFunctionBegin; 3991 if (coloring->ctype == IS_COLORING_GLOBAL) { 3992 ISColoringValue *allcolors,*colors; 3993 ISColoring ocoloring; 3994 3995 /* set coloring for diagonal portion */ 3996 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3997 3998 /* set coloring for off-diagonal portion */ 3999 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4000 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4001 for (i=0; i<a->B->cmap->n; i++) { 4002 colors[i] = allcolors[a->garray[i]]; 4003 } 4004 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4005 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4006 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4007 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4008 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4009 ISColoringValue *colors; 4010 PetscInt *larray; 4011 ISColoring ocoloring; 4012 4013 /* set coloring for diagonal portion */ 4014 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4015 for (i=0; i<a->A->cmap->n; i++) { 4016 larray[i] = i + A->cmap->rstart; 4017 } 4018 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4019 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4020 for (i=0; i<a->A->cmap->n; i++) { 4021 colors[i] = coloring->colors[larray[i]]; 4022 } 4023 ierr = PetscFree(larray);CHKERRQ(ierr); 4024 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4025 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4026 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4027 4028 /* set coloring for off-diagonal portion */ 4029 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4030 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4031 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4032 for (i=0; i<a->B->cmap->n; i++) { 4033 colors[i] = coloring->colors[larray[i]]; 4034 } 4035 ierr = PetscFree(larray);CHKERRQ(ierr); 4036 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4037 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4038 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4039 } else { 4040 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4041 } 4042 4043 PetscFunctionReturn(0); 4044 } 4045 4046 #if defined(PETSC_HAVE_ADIC) 4047 #undef __FUNCT__ 4048 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4049 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4050 { 4051 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4052 PetscErrorCode ierr; 4053 4054 PetscFunctionBegin; 4055 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4056 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4057 PetscFunctionReturn(0); 4058 } 4059 #endif 4060 4061 #undef __FUNCT__ 4062 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4063 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4064 { 4065 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4066 PetscErrorCode ierr; 4067 4068 PetscFunctionBegin; 4069 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4070 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4071 PetscFunctionReturn(0); 4072 } 4073 4074 #undef __FUNCT__ 4075 #define __FUNCT__ "MatMerge" 4076 /*@ 4077 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4078 matrices from each processor 4079 4080 Collective on MPI_Comm 4081 4082 Input Parameters: 4083 + comm - the communicators the parallel matrix will live on 4084 . inmat - the input sequential matrices 4085 . n - number of local columns (or PETSC_DECIDE) 4086 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4087 4088 Output Parameter: 4089 . outmat - the parallel matrix generated 4090 4091 Level: advanced 4092 4093 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4094 4095 @*/ 4096 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4097 { 4098 PetscErrorCode ierr; 4099 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4100 PetscInt *indx; 4101 PetscScalar *values; 4102 4103 PetscFunctionBegin; 4104 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4105 if (scall == MAT_INITIAL_MATRIX){ 4106 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4107 if (n == PETSC_DECIDE){ 4108 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4109 } 4110 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4111 rstart -= m; 4112 4113 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4114 for (i=0;i<m;i++) { 4115 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4116 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4117 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4118 } 4119 /* This routine will ONLY return MPIAIJ type matrix */ 4120 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4121 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4122 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4123 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4124 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4125 4126 } else if (scall == MAT_REUSE_MATRIX){ 4127 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4128 } else { 4129 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4130 } 4131 4132 for (i=0;i<m;i++) { 4133 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4134 Ii = i + rstart; 4135 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4136 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4137 } 4138 ierr = MatDestroy(inmat);CHKERRQ(ierr); 4139 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4140 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4141 4142 PetscFunctionReturn(0); 4143 } 4144 4145 #undef __FUNCT__ 4146 #define __FUNCT__ "MatFileSplit" 4147 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4148 { 4149 PetscErrorCode ierr; 4150 PetscMPIInt rank; 4151 PetscInt m,N,i,rstart,nnz; 4152 size_t len; 4153 const PetscInt *indx; 4154 PetscViewer out; 4155 char *name; 4156 Mat B; 4157 const PetscScalar *values; 4158 4159 PetscFunctionBegin; 4160 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4161 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4162 /* Should this be the type of the diagonal block of A? */ 4163 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4164 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4165 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4166 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4167 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4168 for (i=0;i<m;i++) { 4169 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4170 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4171 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4172 } 4173 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4174 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4175 4176 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4177 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4178 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4179 sprintf(name,"%s.%d",outfile,rank); 4180 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4181 ierr = PetscFree(name); 4182 ierr = MatView(B,out);CHKERRQ(ierr); 4183 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4184 ierr = MatDestroy(B);CHKERRQ(ierr); 4185 PetscFunctionReturn(0); 4186 } 4187 4188 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4189 #undef __FUNCT__ 4190 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4191 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4192 { 4193 PetscErrorCode ierr; 4194 Mat_Merge_SeqsToMPI *merge; 4195 PetscContainer container; 4196 4197 PetscFunctionBegin; 4198 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4199 if (container) { 4200 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4201 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4202 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4203 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4204 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4205 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4206 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4207 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4208 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4209 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4210 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4211 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4212 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4213 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4214 4215 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4216 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4217 } 4218 ierr = PetscFree(merge);CHKERRQ(ierr); 4219 4220 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4221 PetscFunctionReturn(0); 4222 } 4223 4224 #include "../src/mat/utils/freespace.h" 4225 #include "petscbt.h" 4226 4227 #undef __FUNCT__ 4228 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4229 /*@C 4230 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4231 matrices from each processor 4232 4233 Collective on MPI_Comm 4234 4235 Input Parameters: 4236 + comm - the communicators the parallel matrix will live on 4237 . seqmat - the input sequential matrices 4238 . m - number of local rows (or PETSC_DECIDE) 4239 . n - number of local columns (or PETSC_DECIDE) 4240 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4241 4242 Output Parameter: 4243 . mpimat - the parallel matrix generated 4244 4245 Level: advanced 4246 4247 Notes: 4248 The dimensions of the sequential matrix in each processor MUST be the same. 4249 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4250 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4251 @*/ 4252 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4253 { 4254 PetscErrorCode ierr; 4255 MPI_Comm comm=((PetscObject)mpimat)->comm; 4256 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4257 PetscMPIInt size,rank,taga,*len_s; 4258 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4259 PetscInt proc,m; 4260 PetscInt **buf_ri,**buf_rj; 4261 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4262 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4263 MPI_Request *s_waits,*r_waits; 4264 MPI_Status *status; 4265 MatScalar *aa=a->a; 4266 MatScalar **abuf_r,*ba_i; 4267 Mat_Merge_SeqsToMPI *merge; 4268 PetscContainer container; 4269 4270 PetscFunctionBegin; 4271 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4272 4273 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4274 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4275 4276 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4277 if (container) { 4278 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4279 } 4280 bi = merge->bi; 4281 bj = merge->bj; 4282 buf_ri = merge->buf_ri; 4283 buf_rj = merge->buf_rj; 4284 4285 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4286 owners = merge->rowmap->range; 4287 len_s = merge->len_s; 4288 4289 /* send and recv matrix values */ 4290 /*-----------------------------*/ 4291 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4292 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4293 4294 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4295 for (proc=0,k=0; proc<size; proc++){ 4296 if (!len_s[proc]) continue; 4297 i = owners[proc]; 4298 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4299 k++; 4300 } 4301 4302 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4303 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4304 ierr = PetscFree(status);CHKERRQ(ierr); 4305 4306 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4307 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4308 4309 /* insert mat values of mpimat */ 4310 /*----------------------------*/ 4311 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4312 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4313 4314 for (k=0; k<merge->nrecv; k++){ 4315 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4316 nrows = *(buf_ri_k[k]); 4317 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4318 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4319 } 4320 4321 /* set values of ba */ 4322 m = merge->rowmap->n; 4323 for (i=0; i<m; i++) { 4324 arow = owners[rank] + i; 4325 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4326 bnzi = bi[i+1] - bi[i]; 4327 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4328 4329 /* add local non-zero vals of this proc's seqmat into ba */ 4330 anzi = ai[arow+1] - ai[arow]; 4331 aj = a->j + ai[arow]; 4332 aa = a->a + ai[arow]; 4333 nextaj = 0; 4334 for (j=0; nextaj<anzi; j++){ 4335 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4336 ba_i[j] += aa[nextaj++]; 4337 } 4338 } 4339 4340 /* add received vals into ba */ 4341 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4342 /* i-th row */ 4343 if (i == *nextrow[k]) { 4344 anzi = *(nextai[k]+1) - *nextai[k]; 4345 aj = buf_rj[k] + *(nextai[k]); 4346 aa = abuf_r[k] + *(nextai[k]); 4347 nextaj = 0; 4348 for (j=0; nextaj<anzi; j++){ 4349 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4350 ba_i[j] += aa[nextaj++]; 4351 } 4352 } 4353 nextrow[k]++; nextai[k]++; 4354 } 4355 } 4356 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4357 } 4358 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4359 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4360 4361 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4362 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4363 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4364 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4365 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4366 PetscFunctionReturn(0); 4367 } 4368 4369 #undef __FUNCT__ 4370 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4371 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4372 { 4373 PetscErrorCode ierr; 4374 Mat B_mpi; 4375 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4376 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4377 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4378 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4379 PetscInt len,proc,*dnz,*onz; 4380 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4381 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4382 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4383 MPI_Status *status; 4384 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4385 PetscBT lnkbt; 4386 Mat_Merge_SeqsToMPI *merge; 4387 PetscContainer container; 4388 4389 PetscFunctionBegin; 4390 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4391 4392 /* make sure it is a PETSc comm */ 4393 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4394 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4395 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4396 4397 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4398 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4399 4400 /* determine row ownership */ 4401 /*---------------------------------------------------------*/ 4402 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4403 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4404 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4405 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4406 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4407 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4408 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4409 4410 m = merge->rowmap->n; 4411 M = merge->rowmap->N; 4412 owners = merge->rowmap->range; 4413 4414 /* determine the number of messages to send, their lengths */ 4415 /*---------------------------------------------------------*/ 4416 len_s = merge->len_s; 4417 4418 len = 0; /* length of buf_si[] */ 4419 merge->nsend = 0; 4420 for (proc=0; proc<size; proc++){ 4421 len_si[proc] = 0; 4422 if (proc == rank){ 4423 len_s[proc] = 0; 4424 } else { 4425 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4426 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4427 } 4428 if (len_s[proc]) { 4429 merge->nsend++; 4430 nrows = 0; 4431 for (i=owners[proc]; i<owners[proc+1]; i++){ 4432 if (ai[i+1] > ai[i]) nrows++; 4433 } 4434 len_si[proc] = 2*(nrows+1); 4435 len += len_si[proc]; 4436 } 4437 } 4438 4439 /* determine the number and length of messages to receive for ij-structure */ 4440 /*-------------------------------------------------------------------------*/ 4441 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4442 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4443 4444 /* post the Irecv of j-structure */ 4445 /*-------------------------------*/ 4446 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4447 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4448 4449 /* post the Isend of j-structure */ 4450 /*--------------------------------*/ 4451 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4452 4453 for (proc=0, k=0; proc<size; proc++){ 4454 if (!len_s[proc]) continue; 4455 i = owners[proc]; 4456 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4457 k++; 4458 } 4459 4460 /* receives and sends of j-structure are complete */ 4461 /*------------------------------------------------*/ 4462 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4463 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4464 4465 /* send and recv i-structure */ 4466 /*---------------------------*/ 4467 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4468 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4469 4470 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4471 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4472 for (proc=0,k=0; proc<size; proc++){ 4473 if (!len_s[proc]) continue; 4474 /* form outgoing message for i-structure: 4475 buf_si[0]: nrows to be sent 4476 [1:nrows]: row index (global) 4477 [nrows+1:2*nrows+1]: i-structure index 4478 */ 4479 /*-------------------------------------------*/ 4480 nrows = len_si[proc]/2 - 1; 4481 buf_si_i = buf_si + nrows+1; 4482 buf_si[0] = nrows; 4483 buf_si_i[0] = 0; 4484 nrows = 0; 4485 for (i=owners[proc]; i<owners[proc+1]; i++){ 4486 anzi = ai[i+1] - ai[i]; 4487 if (anzi) { 4488 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4489 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4490 nrows++; 4491 } 4492 } 4493 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4494 k++; 4495 buf_si += len_si[proc]; 4496 } 4497 4498 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4499 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4500 4501 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4502 for (i=0; i<merge->nrecv; i++){ 4503 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4504 } 4505 4506 ierr = PetscFree(len_si);CHKERRQ(ierr); 4507 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4508 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4509 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4510 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4511 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4512 ierr = PetscFree(status);CHKERRQ(ierr); 4513 4514 /* compute a local seq matrix in each processor */ 4515 /*----------------------------------------------*/ 4516 /* allocate bi array and free space for accumulating nonzero column info */ 4517 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4518 bi[0] = 0; 4519 4520 /* create and initialize a linked list */ 4521 nlnk = N+1; 4522 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4523 4524 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4525 len = 0; 4526 len = ai[owners[rank+1]] - ai[owners[rank]]; 4527 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4528 current_space = free_space; 4529 4530 /* determine symbolic info for each local row */ 4531 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4532 4533 for (k=0; k<merge->nrecv; k++){ 4534 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4535 nrows = *buf_ri_k[k]; 4536 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4537 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4538 } 4539 4540 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4541 len = 0; 4542 for (i=0;i<m;i++) { 4543 bnzi = 0; 4544 /* add local non-zero cols of this proc's seqmat into lnk */ 4545 arow = owners[rank] + i; 4546 anzi = ai[arow+1] - ai[arow]; 4547 aj = a->j + ai[arow]; 4548 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4549 bnzi += nlnk; 4550 /* add received col data into lnk */ 4551 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4552 if (i == *nextrow[k]) { /* i-th row */ 4553 anzi = *(nextai[k]+1) - *nextai[k]; 4554 aj = buf_rj[k] + *nextai[k]; 4555 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4556 bnzi += nlnk; 4557 nextrow[k]++; nextai[k]++; 4558 } 4559 } 4560 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4561 4562 /* if free space is not available, make more free space */ 4563 if (current_space->local_remaining<bnzi) { 4564 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4565 nspacedouble++; 4566 } 4567 /* copy data into free space, then initialize lnk */ 4568 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4569 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4570 4571 current_space->array += bnzi; 4572 current_space->local_used += bnzi; 4573 current_space->local_remaining -= bnzi; 4574 4575 bi[i+1] = bi[i] + bnzi; 4576 } 4577 4578 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4579 4580 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4581 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4582 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4583 4584 /* create symbolic parallel matrix B_mpi */ 4585 /*---------------------------------------*/ 4586 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4587 if (n==PETSC_DECIDE) { 4588 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4589 } else { 4590 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4591 } 4592 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4593 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4594 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4595 4596 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4597 B_mpi->assembled = PETSC_FALSE; 4598 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4599 merge->bi = bi; 4600 merge->bj = bj; 4601 merge->buf_ri = buf_ri; 4602 merge->buf_rj = buf_rj; 4603 merge->coi = PETSC_NULL; 4604 merge->coj = PETSC_NULL; 4605 merge->owners_co = PETSC_NULL; 4606 4607 /* attach the supporting struct to B_mpi for reuse */ 4608 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4609 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4610 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4611 *mpimat = B_mpi; 4612 4613 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4614 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4615 PetscFunctionReturn(0); 4616 } 4617 4618 #undef __FUNCT__ 4619 #define __FUNCT__ "MatMerge_SeqsToMPI" 4620 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4621 { 4622 PetscErrorCode ierr; 4623 4624 PetscFunctionBegin; 4625 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4626 if (scall == MAT_INITIAL_MATRIX){ 4627 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4628 } 4629 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4630 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4631 PetscFunctionReturn(0); 4632 } 4633 4634 #undef __FUNCT__ 4635 #define __FUNCT__ "MatGetLocalMat" 4636 /*@ 4637 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4638 4639 Not Collective 4640 4641 Input Parameters: 4642 + A - the matrix 4643 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4644 4645 Output Parameter: 4646 . A_loc - the local sequential matrix generated 4647 4648 Level: developer 4649 4650 @*/ 4651 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4652 { 4653 PetscErrorCode ierr; 4654 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4655 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4656 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4657 MatScalar *aa=a->a,*ba=b->a,*cam; 4658 PetscScalar *ca; 4659 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4660 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4661 4662 PetscFunctionBegin; 4663 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4664 if (scall == MAT_INITIAL_MATRIX){ 4665 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4666 ci[0] = 0; 4667 for (i=0; i<am; i++){ 4668 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4669 } 4670 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4671 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4672 k = 0; 4673 for (i=0; i<am; i++) { 4674 ncols_o = bi[i+1] - bi[i]; 4675 ncols_d = ai[i+1] - ai[i]; 4676 /* off-diagonal portion of A */ 4677 for (jo=0; jo<ncols_o; jo++) { 4678 col = cmap[*bj]; 4679 if (col >= cstart) break; 4680 cj[k] = col; bj++; 4681 ca[k++] = *ba++; 4682 } 4683 /* diagonal portion of A */ 4684 for (j=0; j<ncols_d; j++) { 4685 cj[k] = cstart + *aj++; 4686 ca[k++] = *aa++; 4687 } 4688 /* off-diagonal portion of A */ 4689 for (j=jo; j<ncols_o; j++) { 4690 cj[k] = cmap[*bj++]; 4691 ca[k++] = *ba++; 4692 } 4693 } 4694 /* put together the new matrix */ 4695 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4696 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4697 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4698 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4699 mat->free_a = PETSC_TRUE; 4700 mat->free_ij = PETSC_TRUE; 4701 mat->nonew = 0; 4702 } else if (scall == MAT_REUSE_MATRIX){ 4703 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4704 ci = mat->i; cj = mat->j; cam = mat->a; 4705 for (i=0; i<am; i++) { 4706 /* off-diagonal portion of A */ 4707 ncols_o = bi[i+1] - bi[i]; 4708 for (jo=0; jo<ncols_o; jo++) { 4709 col = cmap[*bj]; 4710 if (col >= cstart) break; 4711 *cam++ = *ba++; bj++; 4712 } 4713 /* diagonal portion of A */ 4714 ncols_d = ai[i+1] - ai[i]; 4715 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4716 /* off-diagonal portion of A */ 4717 for (j=jo; j<ncols_o; j++) { 4718 *cam++ = *ba++; bj++; 4719 } 4720 } 4721 } else { 4722 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4723 } 4724 4725 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4726 PetscFunctionReturn(0); 4727 } 4728 4729 #undef __FUNCT__ 4730 #define __FUNCT__ "MatGetLocalMatCondensed" 4731 /*@C 4732 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4733 4734 Not Collective 4735 4736 Input Parameters: 4737 + A - the matrix 4738 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4739 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4740 4741 Output Parameter: 4742 . A_loc - the local sequential matrix generated 4743 4744 Level: developer 4745 4746 @*/ 4747 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4748 { 4749 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4750 PetscErrorCode ierr; 4751 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4752 IS isrowa,iscola; 4753 Mat *aloc; 4754 4755 PetscFunctionBegin; 4756 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4757 if (!row){ 4758 start = A->rmap->rstart; end = A->rmap->rend; 4759 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4760 } else { 4761 isrowa = *row; 4762 } 4763 if (!col){ 4764 start = A->cmap->rstart; 4765 cmap = a->garray; 4766 nzA = a->A->cmap->n; 4767 nzB = a->B->cmap->n; 4768 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4769 ncols = 0; 4770 for (i=0; i<nzB; i++) { 4771 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4772 else break; 4773 } 4774 imark = i; 4775 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4776 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4777 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4778 ierr = PetscFree(idx);CHKERRQ(ierr); 4779 } else { 4780 iscola = *col; 4781 } 4782 if (scall != MAT_INITIAL_MATRIX){ 4783 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4784 aloc[0] = *A_loc; 4785 } 4786 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4787 *A_loc = aloc[0]; 4788 ierr = PetscFree(aloc);CHKERRQ(ierr); 4789 if (!row){ 4790 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4791 } 4792 if (!col){ 4793 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4794 } 4795 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4796 PetscFunctionReturn(0); 4797 } 4798 4799 #undef __FUNCT__ 4800 #define __FUNCT__ "MatGetBrowsOfAcols" 4801 /*@C 4802 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4803 4804 Collective on Mat 4805 4806 Input Parameters: 4807 + A,B - the matrices in mpiaij format 4808 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4809 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4810 4811 Output Parameter: 4812 + rowb, colb - index sets of rows and columns of B to extract 4813 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4814 - B_seq - the sequential matrix generated 4815 4816 Level: developer 4817 4818 @*/ 4819 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4820 { 4821 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4822 PetscErrorCode ierr; 4823 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4824 IS isrowb,iscolb; 4825 Mat *bseq; 4826 4827 PetscFunctionBegin; 4828 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4829 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4830 } 4831 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4832 4833 if (scall == MAT_INITIAL_MATRIX){ 4834 start = A->cmap->rstart; 4835 cmap = a->garray; 4836 nzA = a->A->cmap->n; 4837 nzB = a->B->cmap->n; 4838 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4839 ncols = 0; 4840 for (i=0; i<nzB; i++) { /* row < local row index */ 4841 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4842 else break; 4843 } 4844 imark = i; 4845 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4846 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4847 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4848 ierr = PetscFree(idx);CHKERRQ(ierr); 4849 *brstart = imark; 4850 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4851 } else { 4852 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4853 isrowb = *rowb; iscolb = *colb; 4854 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4855 bseq[0] = *B_seq; 4856 } 4857 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4858 *B_seq = bseq[0]; 4859 ierr = PetscFree(bseq);CHKERRQ(ierr); 4860 if (!rowb){ 4861 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4862 } else { 4863 *rowb = isrowb; 4864 } 4865 if (!colb){ 4866 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4867 } else { 4868 *colb = iscolb; 4869 } 4870 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4871 PetscFunctionReturn(0); 4872 } 4873 4874 #undef __FUNCT__ 4875 #define __FUNCT__ "MatGetBrowsOfAoCols" 4876 /*@C 4877 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4878 of the OFF-DIAGONAL portion of local A 4879 4880 Collective on Mat 4881 4882 Input Parameters: 4883 + A,B - the matrices in mpiaij format 4884 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4885 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4886 . startsj_r - similar to startsj for receives 4887 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4888 4889 Output Parameter: 4890 + B_oth - the sequential matrix generated 4891 4892 Level: developer 4893 4894 @*/ 4895 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4896 { 4897 VecScatter_MPI_General *gen_to,*gen_from; 4898 PetscErrorCode ierr; 4899 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4900 Mat_SeqAIJ *b_oth; 4901 VecScatter ctx=a->Mvctx; 4902 MPI_Comm comm=((PetscObject)ctx)->comm; 4903 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4904 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4905 PetscScalar *rvalues,*svalues; 4906 MatScalar *b_otha,*bufa,*bufA; 4907 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4908 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4909 MPI_Status *sstatus,rstatus; 4910 PetscMPIInt jj; 4911 PetscInt *cols,sbs,rbs; 4912 PetscScalar *vals; 4913 4914 PetscFunctionBegin; 4915 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4916 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4917 } 4918 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4919 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4920 4921 gen_to = (VecScatter_MPI_General*)ctx->todata; 4922 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4923 rvalues = gen_from->values; /* holds the length of receiving row */ 4924 svalues = gen_to->values; /* holds the length of sending row */ 4925 nrecvs = gen_from->n; 4926 nsends = gen_to->n; 4927 4928 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4929 srow = gen_to->indices; /* local row index to be sent */ 4930 sstarts = gen_to->starts; 4931 sprocs = gen_to->procs; 4932 sstatus = gen_to->sstatus; 4933 sbs = gen_to->bs; 4934 rstarts = gen_from->starts; 4935 rprocs = gen_from->procs; 4936 rbs = gen_from->bs; 4937 4938 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4939 if (scall == MAT_INITIAL_MATRIX){ 4940 /* i-array */ 4941 /*---------*/ 4942 /* post receives */ 4943 for (i=0; i<nrecvs; i++){ 4944 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4945 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4946 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4947 } 4948 4949 /* pack the outgoing message */ 4950 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4951 sstartsj[0] = 0; rstartsj[0] = 0; 4952 len = 0; /* total length of j or a array to be sent */ 4953 k = 0; 4954 for (i=0; i<nsends; i++){ 4955 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4956 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4957 for (j=0; j<nrows; j++) { 4958 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4959 for (l=0; l<sbs; l++){ 4960 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4961 rowlen[j*sbs+l] = ncols; 4962 len += ncols; 4963 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4964 } 4965 k++; 4966 } 4967 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4968 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4969 } 4970 /* recvs and sends of i-array are completed */ 4971 i = nrecvs; 4972 while (i--) { 4973 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4974 } 4975 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4976 4977 /* allocate buffers for sending j and a arrays */ 4978 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4979 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4980 4981 /* create i-array of B_oth */ 4982 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4983 b_othi[0] = 0; 4984 len = 0; /* total length of j or a array to be received */ 4985 k = 0; 4986 for (i=0; i<nrecvs; i++){ 4987 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4988 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4989 for (j=0; j<nrows; j++) { 4990 b_othi[k+1] = b_othi[k] + rowlen[j]; 4991 len += rowlen[j]; k++; 4992 } 4993 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4994 } 4995 4996 /* allocate space for j and a arrrays of B_oth */ 4997 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4998 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4999 5000 /* j-array */ 5001 /*---------*/ 5002 /* post receives of j-array */ 5003 for (i=0; i<nrecvs; i++){ 5004 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5005 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5006 } 5007 5008 /* pack the outgoing message j-array */ 5009 k = 0; 5010 for (i=0; i<nsends; i++){ 5011 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5012 bufJ = bufj+sstartsj[i]; 5013 for (j=0; j<nrows; j++) { 5014 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5015 for (ll=0; ll<sbs; ll++){ 5016 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5017 for (l=0; l<ncols; l++){ 5018 *bufJ++ = cols[l]; 5019 } 5020 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5021 } 5022 } 5023 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5024 } 5025 5026 /* recvs and sends of j-array are completed */ 5027 i = nrecvs; 5028 while (i--) { 5029 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5030 } 5031 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5032 } else if (scall == MAT_REUSE_MATRIX){ 5033 sstartsj = *startsj; 5034 rstartsj = *startsj_r; 5035 bufa = *bufa_ptr; 5036 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5037 b_otha = b_oth->a; 5038 } else { 5039 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5040 } 5041 5042 /* a-array */ 5043 /*---------*/ 5044 /* post receives of a-array */ 5045 for (i=0; i<nrecvs; i++){ 5046 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5047 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5048 } 5049 5050 /* pack the outgoing message a-array */ 5051 k = 0; 5052 for (i=0; i<nsends; i++){ 5053 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5054 bufA = bufa+sstartsj[i]; 5055 for (j=0; j<nrows; j++) { 5056 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5057 for (ll=0; ll<sbs; ll++){ 5058 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5059 for (l=0; l<ncols; l++){ 5060 *bufA++ = vals[l]; 5061 } 5062 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5063 } 5064 } 5065 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5066 } 5067 /* recvs and sends of a-array are completed */ 5068 i = nrecvs; 5069 while (i--) { 5070 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5071 } 5072 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5073 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5074 5075 if (scall == MAT_INITIAL_MATRIX){ 5076 /* put together the new matrix */ 5077 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5078 5079 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5080 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5081 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5082 b_oth->free_a = PETSC_TRUE; 5083 b_oth->free_ij = PETSC_TRUE; 5084 b_oth->nonew = 0; 5085 5086 ierr = PetscFree(bufj);CHKERRQ(ierr); 5087 if (!startsj || !bufa_ptr){ 5088 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5089 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5090 } else { 5091 *startsj = sstartsj; 5092 *startsj_r = rstartsj; 5093 *bufa_ptr = bufa; 5094 } 5095 } 5096 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5097 PetscFunctionReturn(0); 5098 } 5099 5100 #undef __FUNCT__ 5101 #define __FUNCT__ "MatGetCommunicationStructs" 5102 /*@C 5103 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5104 5105 Not Collective 5106 5107 Input Parameters: 5108 . A - The matrix in mpiaij format 5109 5110 Output Parameter: 5111 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5112 . colmap - A map from global column index to local index into lvec 5113 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5114 5115 Level: developer 5116 5117 @*/ 5118 #if defined (PETSC_USE_CTABLE) 5119 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5120 #else 5121 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5122 #endif 5123 { 5124 Mat_MPIAIJ *a; 5125 5126 PetscFunctionBegin; 5127 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5128 PetscValidPointer(lvec, 2); 5129 PetscValidPointer(colmap, 3); 5130 PetscValidPointer(multScatter, 4); 5131 a = (Mat_MPIAIJ *) A->data; 5132 if (lvec) *lvec = a->lvec; 5133 if (colmap) *colmap = a->colmap; 5134 if (multScatter) *multScatter = a->Mvctx; 5135 PetscFunctionReturn(0); 5136 } 5137 5138 EXTERN_C_BEGIN 5139 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 5140 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 5141 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5142 EXTERN_C_END 5143 5144 #undef __FUNCT__ 5145 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5146 /* 5147 Computes (B'*A')' since computing B*A directly is untenable 5148 5149 n p p 5150 ( ) ( ) ( ) 5151 m ( A ) * n ( B ) = m ( C ) 5152 ( ) ( ) ( ) 5153 5154 */ 5155 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5156 { 5157 PetscErrorCode ierr; 5158 Mat At,Bt,Ct; 5159 5160 PetscFunctionBegin; 5161 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5162 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5163 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5164 ierr = MatDestroy(At);CHKERRQ(ierr); 5165 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5166 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5167 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5168 PetscFunctionReturn(0); 5169 } 5170 5171 #undef __FUNCT__ 5172 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5173 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5174 { 5175 PetscErrorCode ierr; 5176 PetscInt m=A->rmap->n,n=B->cmap->n; 5177 Mat Cmat; 5178 5179 PetscFunctionBegin; 5180 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5181 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5182 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5183 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5184 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5185 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5186 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5187 *C = Cmat; 5188 PetscFunctionReturn(0); 5189 } 5190 5191 /* ----------------------------------------------------------------*/ 5192 #undef __FUNCT__ 5193 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5194 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5195 { 5196 PetscErrorCode ierr; 5197 5198 PetscFunctionBegin; 5199 if (scall == MAT_INITIAL_MATRIX){ 5200 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5201 } 5202 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5203 PetscFunctionReturn(0); 5204 } 5205 5206 EXTERN_C_BEGIN 5207 #if defined(PETSC_HAVE_MUMPS) 5208 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5209 #endif 5210 #if defined(PETSC_HAVE_PASTIX) 5211 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5212 #endif 5213 #if defined(PETSC_HAVE_SUPERLU_DIST) 5214 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5215 #endif 5216 #if defined(PETSC_HAVE_SPOOLES) 5217 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5218 #endif 5219 EXTERN_C_END 5220 5221 /*MC 5222 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5223 5224 Options Database Keys: 5225 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5226 5227 Level: beginner 5228 5229 .seealso: MatCreateMPIAIJ() 5230 M*/ 5231 5232 EXTERN_C_BEGIN 5233 #undef __FUNCT__ 5234 #define __FUNCT__ "MatCreate_MPIAIJ" 5235 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5236 { 5237 Mat_MPIAIJ *b; 5238 PetscErrorCode ierr; 5239 PetscMPIInt size; 5240 5241 PetscFunctionBegin; 5242 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5243 5244 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5245 B->data = (void*)b; 5246 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5247 B->rmap->bs = 1; 5248 B->assembled = PETSC_FALSE; 5249 B->mapping = 0; 5250 5251 B->insertmode = NOT_SET_VALUES; 5252 b->size = size; 5253 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5254 5255 /* build cache for off array entries formed */ 5256 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5257 b->donotstash = PETSC_FALSE; 5258 b->colmap = 0; 5259 b->garray = 0; 5260 b->roworiented = PETSC_TRUE; 5261 5262 /* stuff used for matrix vector multiply */ 5263 b->lvec = PETSC_NULL; 5264 b->Mvctx = PETSC_NULL; 5265 5266 /* stuff for MatGetRow() */ 5267 b->rowindices = 0; 5268 b->rowvalues = 0; 5269 b->getrowactive = PETSC_FALSE; 5270 5271 #if defined(PETSC_HAVE_SPOOLES) 5272 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5273 "MatGetFactor_mpiaij_spooles", 5274 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5275 #endif 5276 #if defined(PETSC_HAVE_MUMPS) 5277 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5278 "MatGetFactor_aij_mumps", 5279 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5280 #endif 5281 #if defined(PETSC_HAVE_PASTIX) 5282 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5283 "MatGetFactor_mpiaij_pastix", 5284 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5285 #endif 5286 #if defined(PETSC_HAVE_SUPERLU_DIST) 5287 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5288 "MatGetFactor_mpiaij_superlu_dist", 5289 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5290 #endif 5291 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5292 "MatStoreValues_MPIAIJ", 5293 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5294 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5295 "MatRetrieveValues_MPIAIJ", 5296 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5297 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5298 "MatGetDiagonalBlock_MPIAIJ", 5299 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5300 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5301 "MatIsTranspose_MPIAIJ", 5302 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5303 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5304 "MatMPIAIJSetPreallocation_MPIAIJ", 5305 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5306 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5307 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5308 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5309 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5310 "MatDiagonalScaleLocal_MPIAIJ", 5311 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5312 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5313 "MatConvert_MPIAIJ_MPICSRPERM", 5314 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5315 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5316 "MatConvert_MPIAIJ_MPICRL", 5317 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5318 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5319 "MatConvert_MPIAIJ_MPISBAIJ", 5320 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5321 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5322 "MatMatMult_MPIDense_MPIAIJ", 5323 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5324 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5325 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5326 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5327 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5328 "MatMatMultNumeric_MPIDense_MPIAIJ", 5329 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5330 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5331 PetscFunctionReturn(0); 5332 } 5333 EXTERN_C_END 5334 5335 #undef __FUNCT__ 5336 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5337 /*@ 5338 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5339 and "off-diagonal" part of the matrix in CSR format. 5340 5341 Collective on MPI_Comm 5342 5343 Input Parameters: 5344 + comm - MPI communicator 5345 . m - number of local rows (Cannot be PETSC_DECIDE) 5346 . n - This value should be the same as the local size used in creating the 5347 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5348 calculated if N is given) For square matrices n is almost always m. 5349 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5350 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5351 . i - row indices for "diagonal" portion of matrix 5352 . j - column indices 5353 . a - matrix values 5354 . oi - row indices for "off-diagonal" portion of matrix 5355 . oj - column indices 5356 - oa - matrix values 5357 5358 Output Parameter: 5359 . mat - the matrix 5360 5361 Level: advanced 5362 5363 Notes: 5364 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5365 5366 The i and j indices are 0 based 5367 5368 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5369 5370 This sets local rows and cannot be used to set off-processor values. 5371 5372 You cannot later use MatSetValues() to change values in this matrix. 5373 5374 .keywords: matrix, aij, compressed row, sparse, parallel 5375 5376 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5377 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5378 @*/ 5379 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5380 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5381 { 5382 PetscErrorCode ierr; 5383 Mat_MPIAIJ *maij; 5384 5385 PetscFunctionBegin; 5386 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5387 if (i[0]) { 5388 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5389 } 5390 if (oi[0]) { 5391 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5392 } 5393 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5394 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5395 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5396 maij = (Mat_MPIAIJ*) (*mat)->data; 5397 maij->donotstash = PETSC_TRUE; 5398 (*mat)->preallocated = PETSC_TRUE; 5399 5400 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5401 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5402 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5403 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5404 5405 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5406 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5407 5408 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5409 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5410 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5411 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5412 5413 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5414 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5415 PetscFunctionReturn(0); 5416 } 5417 5418 /* 5419 Special version for direct calls from Fortran 5420 */ 5421 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5422 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5423 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5424 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5425 #endif 5426 5427 /* Change these macros so can be used in void function */ 5428 #undef CHKERRQ 5429 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5430 #undef SETERRQ2 5431 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5432 #undef SETERRQ 5433 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5434 5435 EXTERN_C_BEGIN 5436 #undef __FUNCT__ 5437 #define __FUNCT__ "matsetvaluesmpiaij_" 5438 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5439 { 5440 Mat mat = *mmat; 5441 PetscInt m = *mm, n = *mn; 5442 InsertMode addv = *maddv; 5443 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5444 PetscScalar value; 5445 PetscErrorCode ierr; 5446 5447 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5448 if (mat->insertmode == NOT_SET_VALUES) { 5449 mat->insertmode = addv; 5450 } 5451 #if defined(PETSC_USE_DEBUG) 5452 else if (mat->insertmode != addv) { 5453 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5454 } 5455 #endif 5456 { 5457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5459 PetscTruth roworiented = aij->roworiented; 5460 5461 /* Some Variables required in the macro */ 5462 Mat A = aij->A; 5463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5465 MatScalar *aa = a->a; 5466 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5467 Mat B = aij->B; 5468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5470 MatScalar *ba = b->a; 5471 5472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5473 PetscInt nonew = a->nonew; 5474 MatScalar *ap1,*ap2; 5475 5476 PetscFunctionBegin; 5477 for (i=0; i<m; i++) { 5478 if (im[i] < 0) continue; 5479 #if defined(PETSC_USE_DEBUG) 5480 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5481 #endif 5482 if (im[i] >= rstart && im[i] < rend) { 5483 row = im[i] - rstart; 5484 lastcol1 = -1; 5485 rp1 = aj + ai[row]; 5486 ap1 = aa + ai[row]; 5487 rmax1 = aimax[row]; 5488 nrow1 = ailen[row]; 5489 low1 = 0; 5490 high1 = nrow1; 5491 lastcol2 = -1; 5492 rp2 = bj + bi[row]; 5493 ap2 = ba + bi[row]; 5494 rmax2 = bimax[row]; 5495 nrow2 = bilen[row]; 5496 low2 = 0; 5497 high2 = nrow2; 5498 5499 for (j=0; j<n; j++) { 5500 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5501 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5502 if (in[j] >= cstart && in[j] < cend){ 5503 col = in[j] - cstart; 5504 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5505 } else if (in[j] < 0) continue; 5506 #if defined(PETSC_USE_DEBUG) 5507 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5508 #endif 5509 else { 5510 if (mat->was_assembled) { 5511 if (!aij->colmap) { 5512 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5513 } 5514 #if defined (PETSC_USE_CTABLE) 5515 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5516 col--; 5517 #else 5518 col = aij->colmap[in[j]] - 1; 5519 #endif 5520 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5521 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5522 col = in[j]; 5523 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5524 B = aij->B; 5525 b = (Mat_SeqAIJ*)B->data; 5526 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5527 rp2 = bj + bi[row]; 5528 ap2 = ba + bi[row]; 5529 rmax2 = bimax[row]; 5530 nrow2 = bilen[row]; 5531 low2 = 0; 5532 high2 = nrow2; 5533 bm = aij->B->rmap->n; 5534 ba = b->a; 5535 } 5536 } else col = in[j]; 5537 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5538 } 5539 } 5540 } else { 5541 if (!aij->donotstash) { 5542 if (roworiented) { 5543 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5544 } else { 5545 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5546 } 5547 } 5548 } 5549 }} 5550 PetscFunctionReturnVoid(); 5551 } 5552 EXTERN_C_END 5553 5554