1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 if (v) PetscValidScalarPointer(v,6); 331 for (i=0; i<m; i++) { 332 if (im[i] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 335 #endif 336 if (im[i] >= rstart && im[i] < rend) { 337 row = im[i] - rstart; 338 lastcol1 = -1; 339 rp1 = aj + ai[row]; 340 ap1 = aa + ai[row]; 341 rmax1 = aimax[row]; 342 nrow1 = ailen[row]; 343 low1 = 0; 344 high1 = nrow1; 345 lastcol2 = -1; 346 rp2 = bj + bi[row]; 347 ap2 = ba + bi[row]; 348 rmax2 = bimax[row]; 349 nrow2 = bilen[row]; 350 low2 = 0; 351 high2 = nrow2; 352 353 for (j=0; j<n; j++) { 354 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 355 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 356 if (in[j] >= cstart && in[j] < cend){ 357 col = in[j] - cstart; 358 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 359 } else if (in[j] < 0) continue; 360 #if defined(PETSC_USE_DEBUG) 361 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 362 #endif 363 else { 364 if (mat->was_assembled) { 365 if (!aij->colmap) { 366 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 367 } 368 #if defined (PETSC_USE_CTABLE) 369 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 370 col--; 371 #else 372 col = aij->colmap[in[j]] - 1; 373 #endif 374 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 375 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 376 col = in[j]; 377 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 378 B = aij->B; 379 b = (Mat_SeqAIJ*)B->data; 380 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 381 rp2 = bj + bi[row]; 382 ap2 = ba + bi[row]; 383 rmax2 = bimax[row]; 384 nrow2 = bilen[row]; 385 low2 = 0; 386 high2 = nrow2; 387 bm = aij->B->rmap->n; 388 ba = b->a; 389 } 390 } else col = in[j]; 391 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 392 } 393 } 394 } else { 395 if (!aij->donotstash) { 396 if (roworiented) { 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } else { 399 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 400 } 401 } 402 } 403 } 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatGetValues_MPIAIJ" 409 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 414 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 415 416 PetscFunctionBegin; 417 for (i=0; i<m; i++) { 418 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 419 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 420 if (idxm[i] >= rstart && idxm[i] < rend) { 421 row = idxm[i] - rstart; 422 for (j=0; j<n; j++) { 423 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 424 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 425 if (idxn[j] >= cstart && idxn[j] < cend){ 426 col = idxn[j] - cstart; 427 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 428 } else { 429 if (!aij->colmap) { 430 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 431 } 432 #if defined (PETSC_USE_CTABLE) 433 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 434 col --; 435 #else 436 col = aij->colmap[idxn[j]] - 1; 437 #endif 438 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 439 else { 440 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 441 } 442 } 443 } 444 } else { 445 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 446 } 447 } 448 PetscFunctionReturn(0); 449 } 450 451 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 455 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 456 { 457 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 458 PetscErrorCode ierr; 459 PetscInt nstash,reallocs; 460 InsertMode addv; 461 462 PetscFunctionBegin; 463 if (aij->donotstash) { 464 PetscFunctionReturn(0); 465 } 466 467 /* make sure all processors are either in INSERTMODE or ADDMODE */ 468 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 469 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 mat->insertmode = addv; /* in case this processor had no cache */ 471 472 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 473 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 474 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 484 PetscErrorCode ierr; 485 PetscMPIInt n; 486 PetscInt i,j,rstart,ncols,flg; 487 PetscInt *row,*col; 488 PetscTruth other_disassembled; 489 PetscScalar *val; 490 InsertMode addv = mat->insertmode; 491 492 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 493 PetscFunctionBegin; 494 if (!aij->donotstash) { 495 while (1) { 496 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 497 if (!flg) break; 498 499 for (i=0; i<n;) { 500 /* Now identify the consecutive vals belonging to the same row */ 501 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 502 if (j < n) ncols = j-i; 503 else ncols = n-i; 504 /* Now assemble all these values with a single function call */ 505 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 506 i = j; 507 } 508 } 509 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 510 } 511 a->compressedrow.use = PETSC_FALSE; 512 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 513 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 514 515 /* determine if any processor has disassembled, if so we must 516 also disassemble ourselfs, in order that we may reassemble. */ 517 /* 518 if nonzero structure of submatrix B cannot change then we know that 519 no processor disassembled thus we can skip this stuff 520 */ 521 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 522 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 523 if (mat->was_assembled && !other_disassembled) { 524 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 } 526 } 527 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 528 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 529 } 530 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 531 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 532 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 534 535 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 536 aij->rowvalues = 0; 537 538 /* used by MatAXPY() */ 539 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 540 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 541 542 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 543 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 550 { 551 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 556 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatZeroRows_MPIAIJ" 562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 563 { 564 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 567 PetscInt i,*owners = A->rmap->range; 568 PetscInt *nprocs,j,idx,nsends,row; 569 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 570 PetscInt *rvalues,count,base,slen,*source; 571 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 572 MPI_Comm comm = ((PetscObject)A)->comm; 573 MPI_Request *send_waits,*recv_waits; 574 MPI_Status recv_status,*send_status; 575 #if defined(PETSC_DEBUG) 576 PetscTruth found = PETSC_FALSE; 577 #endif 578 579 PetscFunctionBegin; 580 /* first count number of contributors to each processor */ 581 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 582 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 583 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 584 j = 0; 585 for (i=0; i<N; i++) { 586 if (lastidx > (idx = rows[i])) j = 0; 587 lastidx = idx; 588 for (; j<size; j++) { 589 if (idx >= owners[j] && idx < owners[j+1]) { 590 nprocs[2*j]++; 591 nprocs[2*j+1] = 1; 592 owner[i] = j; 593 #if defined(PETSC_DEBUG) 594 found = PETSC_TRUE; 595 #endif 596 break; 597 } 598 } 599 #if defined(PETSC_DEBUG) 600 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 601 found = PETSC_FALSE; 602 #endif 603 } 604 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 605 606 /* inform other processors of number of messages and max length*/ 607 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 608 609 /* post receives: */ 610 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 611 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 612 for (i=0; i<nrecvs; i++) { 613 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 614 } 615 616 /* do sends: 617 1) starts[i] gives the starting index in svalues for stuff going to 618 the ith processor 619 */ 620 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 621 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 622 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 623 starts[0] = 0; 624 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 625 for (i=0; i<N; i++) { 626 svalues[starts[owner[i]]++] = rows[i]; 627 } 628 629 starts[0] = 0; 630 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 631 count = 0; 632 for (i=0; i<size; i++) { 633 if (nprocs[2*i+1]) { 634 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 635 } 636 } 637 ierr = PetscFree(starts);CHKERRQ(ierr); 638 639 base = owners[rank]; 640 641 /* wait on receives */ 642 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 */ 677 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 678 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 679 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 680 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 681 } else if (diag != 0.0) { 682 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 683 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 684 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 685 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 686 } 687 for (i = 0; i < slen; i++) { 688 row = lrows[i] + rstart; 689 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 690 } 691 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 692 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 } else { 694 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 695 } 696 ierr = PetscFree(lrows);CHKERRQ(ierr); 697 698 /* wait on sends */ 699 if (nsends) { 700 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 701 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 702 ierr = PetscFree(send_status);CHKERRQ(ierr); 703 } 704 ierr = PetscFree(send_waits);CHKERRQ(ierr); 705 ierr = PetscFree(svalues);CHKERRQ(ierr); 706 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatMult_MPIAIJ" 712 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 715 PetscErrorCode ierr; 716 PetscInt nt; 717 718 PetscFunctionBegin; 719 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 720 if (nt != A->cmap->n) { 721 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 722 } 723 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 724 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 725 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 732 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 733 { 734 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatMultAdd_MPIAIJ" 744 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 745 { 746 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 747 PetscErrorCode ierr; 748 749 PetscFunctionBegin; 750 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 751 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 752 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 753 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 759 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 760 { 761 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 762 PetscErrorCode ierr; 763 PetscTruth merged; 764 765 PetscFunctionBegin; 766 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 767 /* do nondiagonal part */ 768 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 769 if (!merged) { 770 /* send it on its way */ 771 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 772 /* do local part */ 773 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 774 /* receive remote parts: note this assumes the values are not actually */ 775 /* added in yy until the next line, */ 776 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 777 } else { 778 /* do local part */ 779 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 780 /* send it on its way */ 781 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 782 /* values actually were received in the Begin() but we need to call this nop */ 783 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 784 } 785 PetscFunctionReturn(0); 786 } 787 788 EXTERN_C_BEGIN 789 #undef __FUNCT__ 790 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 791 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 792 { 793 MPI_Comm comm; 794 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 795 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 796 IS Me,Notme; 797 PetscErrorCode ierr; 798 PetscInt M,N,first,last,*notme,i; 799 PetscMPIInt size; 800 801 PetscFunctionBegin; 802 803 /* Easy test: symmetric diagonal block */ 804 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 805 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 806 if (!*f) PetscFunctionReturn(0); 807 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 808 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 809 if (size == 1) PetscFunctionReturn(0); 810 811 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 812 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 813 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 814 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 815 for (i=0; i<first; i++) notme[i] = i; 816 for (i=last; i<M; i++) notme[i-last+first] = i; 817 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 818 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 819 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 820 Aoff = Aoffs[0]; 821 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 822 Boff = Boffs[0]; 823 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 824 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 825 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 826 ierr = ISDestroy(Me);CHKERRQ(ierr); 827 ierr = ISDestroy(Notme);CHKERRQ(ierr); 828 829 PetscFunctionReturn(0); 830 } 831 EXTERN_C_END 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 835 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 836 { 837 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 838 PetscErrorCode ierr; 839 840 PetscFunctionBegin; 841 /* do nondiagonal part */ 842 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 843 /* send it on its way */ 844 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 /* do local part */ 846 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 847 /* receive remote parts */ 848 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 849 PetscFunctionReturn(0); 850 } 851 852 /* 853 This only works correctly for square matrices where the subblock A->A is the 854 diagonal block 855 */ 856 #undef __FUNCT__ 857 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 858 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 859 { 860 PetscErrorCode ierr; 861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 862 863 PetscFunctionBegin; 864 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 865 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 866 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatScale_MPIAIJ" 872 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 873 { 874 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 875 PetscErrorCode ierr; 876 877 PetscFunctionBegin; 878 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 879 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 #undef __FUNCT__ 884 #define __FUNCT__ "MatDestroy_MPIAIJ" 885 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 886 { 887 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 888 PetscErrorCode ierr; 889 890 PetscFunctionBegin; 891 #if defined(PETSC_USE_LOG) 892 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 893 #endif 894 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 895 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 896 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 897 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 898 #if defined (PETSC_USE_CTABLE) 899 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 900 #else 901 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 902 #endif 903 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 904 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 905 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 906 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 907 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 908 ierr = PetscFree(aij);CHKERRQ(ierr); 909 910 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatView_MPIAIJ_Binary" 924 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 925 { 926 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 927 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 928 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 929 PetscErrorCode ierr; 930 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 931 int fd; 932 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 933 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 934 PetscScalar *column_values; 935 936 PetscFunctionBegin; 937 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 938 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 939 nz = A->nz + B->nz; 940 if (!rank) { 941 header[0] = MAT_FILE_CLASSID; 942 header[1] = mat->rmap->N; 943 header[2] = mat->cmap->N; 944 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 945 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 946 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 947 /* get largest number of rows any processor has */ 948 rlen = mat->rmap->n; 949 range = mat->rmap->range; 950 for (i=1; i<size; i++) { 951 rlen = PetscMax(rlen,range[i+1] - range[i]); 952 } 953 } else { 954 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 955 rlen = mat->rmap->n; 956 } 957 958 /* load up the local row counts */ 959 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 960 for (i=0; i<mat->rmap->n; i++) { 961 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 962 } 963 964 /* store the row lengths to the file */ 965 if (!rank) { 966 MPI_Status status; 967 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 for (i=1; i<size; i++) { 969 rlen = range[i+1] - range[i]; 970 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 971 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 972 } 973 } else { 974 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 975 } 976 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 977 978 /* load up the local column indices */ 979 nzmax = nz; /* )th processor needs space a largest processor needs */ 980 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 981 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 982 cnt = 0; 983 for (i=0; i<mat->rmap->n; i++) { 984 for (j=B->i[i]; j<B->i[i+1]; j++) { 985 if ( (col = garray[B->j[j]]) > cstart) break; 986 column_indices[cnt++] = col; 987 } 988 for (k=A->i[i]; k<A->i[i+1]; k++) { 989 column_indices[cnt++] = A->j[k] + cstart; 990 } 991 for (; j<B->i[i+1]; j++) { 992 column_indices[cnt++] = garray[B->j[j]]; 993 } 994 } 995 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 996 997 /* store the column indices to the file */ 998 if (!rank) { 999 MPI_Status status; 1000 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1001 for (i=1; i<size; i++) { 1002 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1003 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1004 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1005 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1006 } 1007 } else { 1008 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1009 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1010 } 1011 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1012 1013 /* load up the local column values */ 1014 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1015 cnt = 0; 1016 for (i=0; i<mat->rmap->n; i++) { 1017 for (j=B->i[i]; j<B->i[i+1]; j++) { 1018 if ( garray[B->j[j]] > cstart) break; 1019 column_values[cnt++] = B->a[j]; 1020 } 1021 for (k=A->i[i]; k<A->i[i+1]; k++) { 1022 column_values[cnt++] = A->a[k]; 1023 } 1024 for (; j<B->i[i+1]; j++) { 1025 column_values[cnt++] = B->a[j]; 1026 } 1027 } 1028 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1029 1030 /* store the column values to the file */ 1031 if (!rank) { 1032 MPI_Status status; 1033 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1034 for (i=1; i<size; i++) { 1035 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1036 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1037 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1038 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1039 } 1040 } else { 1041 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1042 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1043 } 1044 ierr = PetscFree(column_values);CHKERRQ(ierr); 1045 PetscFunctionReturn(0); 1046 } 1047 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1050 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1051 { 1052 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1053 PetscErrorCode ierr; 1054 PetscMPIInt rank = aij->rank,size = aij->size; 1055 PetscTruth isdraw,iascii,isbinary; 1056 PetscViewer sviewer; 1057 PetscViewerFormat format; 1058 1059 PetscFunctionBegin; 1060 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1061 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1062 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1063 if (iascii) { 1064 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1065 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1066 MatInfo info; 1067 PetscTruth inodes; 1068 1069 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1070 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1072 if (!inodes) { 1073 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1074 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1075 } else { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } 1079 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1081 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1082 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1083 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1084 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1085 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1088 PetscInt inodecount,inodelimit,*inodes; 1089 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1090 if (inodes) { 1091 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1092 } else { 1093 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1097 PetscFunctionReturn(0); 1098 } 1099 } else if (isbinary) { 1100 if (size == 1) { 1101 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1102 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1103 } else { 1104 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1105 } 1106 PetscFunctionReturn(0); 1107 } else if (isdraw) { 1108 PetscDraw draw; 1109 PetscTruth isnull; 1110 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1111 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1112 } 1113 1114 if (size == 1) { 1115 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1116 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1117 } else { 1118 /* assemble the entire matrix onto first processor. */ 1119 Mat A; 1120 Mat_SeqAIJ *Aloc; 1121 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1122 MatScalar *a; 1123 1124 if (mat->rmap->N > 1024) { 1125 PetscTruth flg = PETSC_FALSE; 1126 1127 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1128 if (!flg) { 1129 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1130 } 1131 } 1132 1133 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1134 if (!rank) { 1135 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1136 } else { 1137 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1138 } 1139 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1140 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1141 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1142 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1143 1144 /* copy over the A part */ 1145 Aloc = (Mat_SeqAIJ*)aij->A->data; 1146 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1147 row = mat->rmap->rstart; 1148 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1149 for (i=0; i<m; i++) { 1150 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1151 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1152 } 1153 aj = Aloc->j; 1154 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1155 1156 /* copy over the B part */ 1157 Aloc = (Mat_SeqAIJ*)aij->B->data; 1158 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1159 row = mat->rmap->rstart; 1160 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1161 ct = cols; 1162 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1163 for (i=0; i<m; i++) { 1164 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1165 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1166 } 1167 ierr = PetscFree(ct);CHKERRQ(ierr); 1168 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1169 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1170 /* 1171 Everyone has to call to draw the matrix since the graphics waits are 1172 synchronized across all processors that share the PetscDraw object 1173 */ 1174 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1175 if (!rank) { 1176 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1177 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1178 } 1179 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1180 ierr = MatDestroy(A);CHKERRQ(ierr); 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatView_MPIAIJ" 1187 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1188 { 1189 PetscErrorCode ierr; 1190 PetscTruth iascii,isdraw,issocket,isbinary; 1191 1192 PetscFunctionBegin; 1193 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1194 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1195 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1197 if (iascii || isdraw || isbinary || issocket) { 1198 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1199 } else { 1200 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1201 } 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatSOR_MPIAIJ" 1207 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1208 { 1209 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1210 PetscErrorCode ierr; 1211 Vec bb1 = 0; 1212 PetscTruth hasop; 1213 1214 PetscFunctionBegin; 1215 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1216 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1217 } 1218 1219 if (flag == SOR_APPLY_UPPER) { 1220 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1221 PetscFunctionReturn(0); 1222 } 1223 1224 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1225 if (flag & SOR_ZERO_INITIAL_GUESS) { 1226 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1227 its--; 1228 } 1229 1230 while (its--) { 1231 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 1234 /* update rhs: bb1 = bb - B*x */ 1235 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1236 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1237 1238 /* local sweep */ 1239 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1240 } 1241 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1242 if (flag & SOR_ZERO_INITIAL_GUESS) { 1243 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1244 its--; 1245 } 1246 while (its--) { 1247 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1249 1250 /* update rhs: bb1 = bb - B*x */ 1251 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1252 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1253 1254 /* local sweep */ 1255 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1256 } 1257 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1258 if (flag & SOR_ZERO_INITIAL_GUESS) { 1259 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1260 its--; 1261 } 1262 while (its--) { 1263 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 1266 /* update rhs: bb1 = bb - B*x */ 1267 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1268 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1269 1270 /* local sweep */ 1271 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1272 } 1273 } else if (flag & SOR_EISENSTAT) { 1274 Vec xx1; 1275 1276 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1277 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1278 1279 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1280 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1281 if (!mat->diag) { 1282 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1283 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1284 } 1285 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1286 if (hasop) { 1287 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1288 } else { 1289 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1290 } 1291 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1292 1293 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1294 1295 /* local sweep */ 1296 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1297 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1298 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1299 } else { 1300 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1301 } 1302 1303 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatPermute_MPIAIJ" 1309 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1310 { 1311 MPI_Comm comm,pcomm; 1312 PetscInt first,local_size,nrows; 1313 const PetscInt *rows; 1314 PetscMPIInt size; 1315 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1320 /* make a collective version of 'rowp' */ 1321 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1322 if (pcomm==comm) { 1323 crowp = rowp; 1324 } else { 1325 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1326 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1327 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1328 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1329 } 1330 /* collect the global row permutation and invert it */ 1331 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1332 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1333 if (pcomm!=comm) { 1334 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1335 } 1336 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1337 /* get the local target indices */ 1338 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1339 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1340 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1341 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1342 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1343 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1344 /* the column permutation is so much easier; 1345 make a local version of 'colp' and invert it */ 1346 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1347 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1348 if (size==1) { 1349 lcolp = colp; 1350 } else { 1351 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1352 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1353 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1354 } 1355 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1356 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1357 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1358 if (size>1) { 1359 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1360 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1361 } 1362 /* now we just get the submatrix */ 1363 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1364 /* clean up */ 1365 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1366 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1372 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1373 { 1374 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1375 Mat A = mat->A,B = mat->B; 1376 PetscErrorCode ierr; 1377 PetscReal isend[5],irecv[5]; 1378 1379 PetscFunctionBegin; 1380 info->block_size = 1.0; 1381 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1382 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1383 isend[3] = info->memory; isend[4] = info->mallocs; 1384 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1386 isend[3] += info->memory; isend[4] += info->mallocs; 1387 if (flag == MAT_LOCAL) { 1388 info->nz_used = isend[0]; 1389 info->nz_allocated = isend[1]; 1390 info->nz_unneeded = isend[2]; 1391 info->memory = isend[3]; 1392 info->mallocs = isend[4]; 1393 } else if (flag == MAT_GLOBAL_MAX) { 1394 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1395 info->nz_used = irecv[0]; 1396 info->nz_allocated = irecv[1]; 1397 info->nz_unneeded = irecv[2]; 1398 info->memory = irecv[3]; 1399 info->mallocs = irecv[4]; 1400 } else if (flag == MAT_GLOBAL_SUM) { 1401 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1402 info->nz_used = irecv[0]; 1403 info->nz_allocated = irecv[1]; 1404 info->nz_unneeded = irecv[2]; 1405 info->memory = irecv[3]; 1406 info->mallocs = irecv[4]; 1407 } 1408 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1409 info->fill_ratio_needed = 0; 1410 info->factor_mallocs = 0; 1411 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatSetOption_MPIAIJ" 1417 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1418 { 1419 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1420 PetscErrorCode ierr; 1421 1422 PetscFunctionBegin; 1423 switch (op) { 1424 case MAT_NEW_NONZERO_LOCATIONS: 1425 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1426 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1427 case MAT_KEEP_NONZERO_PATTERN: 1428 case MAT_NEW_NONZERO_LOCATION_ERR: 1429 case MAT_USE_INODES: 1430 case MAT_IGNORE_ZERO_ENTRIES: 1431 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1432 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1433 break; 1434 case MAT_ROW_ORIENTED: 1435 a->roworiented = flg; 1436 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1437 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1438 break; 1439 case MAT_NEW_DIAGONALS: 1440 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1441 break; 1442 case MAT_IGNORE_OFF_PROC_ENTRIES: 1443 a->donotstash = PETSC_TRUE; 1444 break; 1445 case MAT_SYMMETRIC: 1446 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1447 break; 1448 case MAT_STRUCTURALLY_SYMMETRIC: 1449 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1450 break; 1451 case MAT_HERMITIAN: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_SYMMETRY_ETERNAL: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 default: 1458 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1459 } 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatGetRow_MPIAIJ" 1465 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1466 { 1467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1468 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1469 PetscErrorCode ierr; 1470 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1471 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1472 PetscInt *cmap,*idx_p; 1473 1474 PetscFunctionBegin; 1475 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1476 mat->getrowactive = PETSC_TRUE; 1477 1478 if (!mat->rowvalues && (idx || v)) { 1479 /* 1480 allocate enough space to hold information from the longest row. 1481 */ 1482 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1483 PetscInt max = 1,tmp; 1484 for (i=0; i<matin->rmap->n; i++) { 1485 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1486 if (max < tmp) { max = tmp; } 1487 } 1488 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1489 } 1490 1491 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1492 lrow = row - rstart; 1493 1494 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1495 if (!v) {pvA = 0; pvB = 0;} 1496 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1497 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1498 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1499 nztot = nzA + nzB; 1500 1501 cmap = mat->garray; 1502 if (v || idx) { 1503 if (nztot) { 1504 /* Sort by increasing column numbers, assuming A and B already sorted */ 1505 PetscInt imark = -1; 1506 if (v) { 1507 *v = v_p = mat->rowvalues; 1508 for (i=0; i<nzB; i++) { 1509 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1510 else break; 1511 } 1512 imark = i; 1513 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1514 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1515 } 1516 if (idx) { 1517 *idx = idx_p = mat->rowindices; 1518 if (imark > -1) { 1519 for (i=0; i<imark; i++) { 1520 idx_p[i] = cmap[cworkB[i]]; 1521 } 1522 } else { 1523 for (i=0; i<nzB; i++) { 1524 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1525 else break; 1526 } 1527 imark = i; 1528 } 1529 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1530 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1531 } 1532 } else { 1533 if (idx) *idx = 0; 1534 if (v) *v = 0; 1535 } 1536 } 1537 *nz = nztot; 1538 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1539 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1545 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1546 { 1547 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1548 1549 PetscFunctionBegin; 1550 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1551 aij->getrowactive = PETSC_FALSE; 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatNorm_MPIAIJ" 1557 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1558 { 1559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1560 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1561 PetscErrorCode ierr; 1562 PetscInt i,j,cstart = mat->cmap->rstart; 1563 PetscReal sum = 0.0; 1564 MatScalar *v; 1565 1566 PetscFunctionBegin; 1567 if (aij->size == 1) { 1568 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1569 } else { 1570 if (type == NORM_FROBENIUS) { 1571 v = amat->a; 1572 for (i=0; i<amat->nz; i++) { 1573 #if defined(PETSC_USE_COMPLEX) 1574 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1575 #else 1576 sum += (*v)*(*v); v++; 1577 #endif 1578 } 1579 v = bmat->a; 1580 for (i=0; i<bmat->nz; i++) { 1581 #if defined(PETSC_USE_COMPLEX) 1582 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1583 #else 1584 sum += (*v)*(*v); v++; 1585 #endif 1586 } 1587 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1588 *norm = sqrt(*norm); 1589 } else if (type == NORM_1) { /* max column norm */ 1590 PetscReal *tmp,*tmp2; 1591 PetscInt *jj,*garray = aij->garray; 1592 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1593 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1594 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1595 *norm = 0.0; 1596 v = amat->a; jj = amat->j; 1597 for (j=0; j<amat->nz; j++) { 1598 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1599 } 1600 v = bmat->a; jj = bmat->j; 1601 for (j=0; j<bmat->nz; j++) { 1602 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1603 } 1604 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1605 for (j=0; j<mat->cmap->N; j++) { 1606 if (tmp2[j] > *norm) *norm = tmp2[j]; 1607 } 1608 ierr = PetscFree(tmp);CHKERRQ(ierr); 1609 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1610 } else if (type == NORM_INFINITY) { /* max row norm */ 1611 PetscReal ntemp = 0.0; 1612 for (j=0; j<aij->A->rmap->n; j++) { 1613 v = amat->a + amat->i[j]; 1614 sum = 0.0; 1615 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1616 sum += PetscAbsScalar(*v); v++; 1617 } 1618 v = bmat->a + bmat->i[j]; 1619 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1620 sum += PetscAbsScalar(*v); v++; 1621 } 1622 if (sum > ntemp) ntemp = sum; 1623 } 1624 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1625 } else { 1626 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1627 } 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatTranspose_MPIAIJ" 1634 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1635 { 1636 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1637 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1638 PetscErrorCode ierr; 1639 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1640 PetscInt cstart=A->cmap->rstart,ncol; 1641 Mat B; 1642 MatScalar *array; 1643 1644 PetscFunctionBegin; 1645 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1646 1647 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1648 ai = Aloc->i; aj = Aloc->j; 1649 bi = Bloc->i; bj = Bloc->j; 1650 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1651 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1652 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1653 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1654 for (i=0; i<ai[ma]; i++){ 1655 d_nnz[aj[i]] ++; 1656 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1657 } 1658 1659 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1660 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1661 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1662 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1663 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1664 } else { 1665 B = *matout; 1666 } 1667 1668 /* copy over the A part */ 1669 array = Aloc->a; 1670 row = A->rmap->rstart; 1671 for (i=0; i<ma; i++) { 1672 ncol = ai[i+1]-ai[i]; 1673 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1674 row++; array += ncol; aj += ncol; 1675 } 1676 aj = Aloc->j; 1677 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1678 1679 /* copy over the B part */ 1680 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1681 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1682 array = Bloc->a; 1683 row = A->rmap->rstart; 1684 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1685 cols_tmp = cols; 1686 for (i=0; i<mb; i++) { 1687 ncol = bi[i+1]-bi[i]; 1688 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1689 row++; array += ncol; cols_tmp += ncol; 1690 } 1691 ierr = PetscFree(cols);CHKERRQ(ierr); 1692 1693 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1694 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1695 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1696 *matout = B; 1697 } else { 1698 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1699 } 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1705 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1706 { 1707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1708 Mat a = aij->A,b = aij->B; 1709 PetscErrorCode ierr; 1710 PetscInt s1,s2,s3; 1711 1712 PetscFunctionBegin; 1713 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1714 if (rr) { 1715 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1716 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1717 /* Overlap communication with computation. */ 1718 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1719 } 1720 if (ll) { 1721 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1722 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1723 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1724 } 1725 /* scale the diagonal block */ 1726 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1727 1728 if (rr) { 1729 /* Do a scatter end and then right scale the off-diagonal block */ 1730 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1731 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1732 } 1733 1734 PetscFunctionReturn(0); 1735 } 1736 1737 #undef __FUNCT__ 1738 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1739 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1740 { 1741 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1742 PetscErrorCode ierr; 1743 1744 PetscFunctionBegin; 1745 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1746 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1747 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1748 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1749 PetscFunctionReturn(0); 1750 } 1751 #undef __FUNCT__ 1752 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1753 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1754 { 1755 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1756 PetscErrorCode ierr; 1757 1758 PetscFunctionBegin; 1759 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatEqual_MPIAIJ" 1765 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1766 { 1767 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1768 Mat a,b,c,d; 1769 PetscTruth flg; 1770 PetscErrorCode ierr; 1771 1772 PetscFunctionBegin; 1773 a = matA->A; b = matA->B; 1774 c = matB->A; d = matB->B; 1775 1776 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1777 if (flg) { 1778 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1779 } 1780 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatCopy_MPIAIJ" 1786 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1787 { 1788 PetscErrorCode ierr; 1789 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1790 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1791 1792 PetscFunctionBegin; 1793 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1794 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1795 /* because of the column compression in the off-processor part of the matrix a->B, 1796 the number of columns in a->B and b->B may be different, hence we cannot call 1797 the MatCopy() directly on the two parts. If need be, we can provide a more 1798 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1799 then copying the submatrices */ 1800 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1801 } else { 1802 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1803 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1804 } 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1810 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1811 { 1812 PetscErrorCode ierr; 1813 1814 PetscFunctionBegin; 1815 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1816 PetscFunctionReturn(0); 1817 } 1818 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatAXPY_MPIAIJ" 1821 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1822 { 1823 PetscErrorCode ierr; 1824 PetscInt i; 1825 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1826 PetscBLASInt bnz,one=1; 1827 Mat_SeqAIJ *x,*y; 1828 1829 PetscFunctionBegin; 1830 if (str == SAME_NONZERO_PATTERN) { 1831 PetscScalar alpha = a; 1832 x = (Mat_SeqAIJ *)xx->A->data; 1833 y = (Mat_SeqAIJ *)yy->A->data; 1834 bnz = PetscBLASIntCast(x->nz); 1835 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1836 x = (Mat_SeqAIJ *)xx->B->data; 1837 y = (Mat_SeqAIJ *)yy->B->data; 1838 bnz = PetscBLASIntCast(x->nz); 1839 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1840 } else if (str == SUBSET_NONZERO_PATTERN) { 1841 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1842 1843 x = (Mat_SeqAIJ *)xx->B->data; 1844 y = (Mat_SeqAIJ *)yy->B->data; 1845 if (y->xtoy && y->XtoY != xx->B) { 1846 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1847 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1848 } 1849 if (!y->xtoy) { /* get xtoy */ 1850 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1851 y->XtoY = xx->B; 1852 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1853 } 1854 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1855 } else { 1856 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatConjugate_MPIAIJ" 1865 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1866 { 1867 #if defined(PETSC_USE_COMPLEX) 1868 PetscErrorCode ierr; 1869 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1870 1871 PetscFunctionBegin; 1872 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1873 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1874 #else 1875 PetscFunctionBegin; 1876 #endif 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "MatRealPart_MPIAIJ" 1882 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1883 { 1884 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1889 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1895 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1896 { 1897 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1898 PetscErrorCode ierr; 1899 1900 PetscFunctionBegin; 1901 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1902 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #ifdef PETSC_HAVE_PBGL 1907 1908 #include <boost/parallel/mpi/bsp_process_group.hpp> 1909 #include <boost/graph/distributed/ilu_default_graph.hpp> 1910 #include <boost/graph/distributed/ilu_0_block.hpp> 1911 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1912 #include <boost/graph/distributed/petsc/interface.hpp> 1913 #include <boost/multi_array.hpp> 1914 #include <boost/parallel/distributed_property_map->hpp> 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1918 /* 1919 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1920 */ 1921 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1922 { 1923 namespace petsc = boost::distributed::petsc; 1924 1925 namespace graph_dist = boost::graph::distributed; 1926 using boost::graph::distributed::ilu_default::process_group_type; 1927 using boost::graph::ilu_permuted; 1928 1929 PetscTruth row_identity, col_identity; 1930 PetscContainer c; 1931 PetscInt m, n, M, N; 1932 PetscErrorCode ierr; 1933 1934 PetscFunctionBegin; 1935 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1936 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1937 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1938 if (!row_identity || !col_identity) { 1939 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1940 } 1941 1942 process_group_type pg; 1943 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1944 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1945 lgraph_type& level_graph = *lgraph_p; 1946 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1947 1948 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1949 ilu_permuted(level_graph); 1950 1951 /* put together the new matrix */ 1952 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1953 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1954 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1955 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1956 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1957 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1959 1960 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1961 ierr = PetscContainerSetPointer(c, lgraph_p); 1962 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1968 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1969 { 1970 PetscFunctionBegin; 1971 PetscFunctionReturn(0); 1972 } 1973 1974 #undef __FUNCT__ 1975 #define __FUNCT__ "MatSolve_MPIAIJ" 1976 /* 1977 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1978 */ 1979 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1980 { 1981 namespace graph_dist = boost::graph::distributed; 1982 1983 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1984 lgraph_type* lgraph_p; 1985 PetscContainer c; 1986 PetscErrorCode ierr; 1987 1988 PetscFunctionBegin; 1989 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1990 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1991 ierr = VecCopy(b, x);CHKERRQ(ierr); 1992 1993 PetscScalar* array_x; 1994 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1995 PetscInt sx; 1996 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1997 1998 PetscScalar* array_b; 1999 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2000 PetscInt sb; 2001 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2002 2003 lgraph_type& level_graph = *lgraph_p; 2004 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2005 2006 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2007 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2008 ref_x(array_x, boost::extents[num_vertices(graph)]); 2009 2010 typedef boost::iterator_property_map<array_ref_type::iterator, 2011 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2012 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2013 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2014 2015 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2016 2017 PetscFunctionReturn(0); 2018 } 2019 #endif 2020 2021 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2022 PetscInt nzlocal,nsends,nrecvs; 2023 PetscMPIInt *send_rank,*recv_rank; 2024 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2025 PetscScalar *sbuf_a,**rbuf_a; 2026 PetscErrorCode (*MatDestroy)(Mat); 2027 } Mat_Redundant; 2028 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2031 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2032 { 2033 PetscErrorCode ierr; 2034 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2035 PetscInt i; 2036 2037 PetscFunctionBegin; 2038 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2039 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2040 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2041 for (i=0; i<redund->nrecvs; i++){ 2042 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2043 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2044 } 2045 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2046 ierr = PetscFree(redund);CHKERRQ(ierr); 2047 PetscFunctionReturn(0); 2048 } 2049 2050 #undef __FUNCT__ 2051 #define __FUNCT__ "MatDestroy_MatRedundant" 2052 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2053 { 2054 PetscErrorCode ierr; 2055 PetscContainer container; 2056 Mat_Redundant *redund=PETSC_NULL; 2057 2058 PetscFunctionBegin; 2059 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2060 if (container) { 2061 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2062 } else { 2063 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2064 } 2065 A->ops->destroy = redund->MatDestroy; 2066 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2067 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2068 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2074 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2075 { 2076 PetscMPIInt rank,size; 2077 MPI_Comm comm=((PetscObject)mat)->comm; 2078 PetscErrorCode ierr; 2079 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2080 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2081 PetscInt *rowrange=mat->rmap->range; 2082 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2083 Mat A=aij->A,B=aij->B,C=*matredundant; 2084 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2085 PetscScalar *sbuf_a; 2086 PetscInt nzlocal=a->nz+b->nz; 2087 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2088 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2089 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2090 MatScalar *aworkA,*aworkB; 2091 PetscScalar *vals; 2092 PetscMPIInt tag1,tag2,tag3,imdex; 2093 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2094 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2095 MPI_Status recv_status,*send_status; 2096 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2097 PetscInt **rbuf_j=PETSC_NULL; 2098 PetscScalar **rbuf_a=PETSC_NULL; 2099 Mat_Redundant *redund=PETSC_NULL; 2100 PetscContainer container; 2101 2102 PetscFunctionBegin; 2103 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2104 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2105 2106 if (reuse == MAT_REUSE_MATRIX) { 2107 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2108 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2109 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2110 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2111 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2112 if (container) { 2113 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2114 } else { 2115 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2116 } 2117 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2118 2119 nsends = redund->nsends; 2120 nrecvs = redund->nrecvs; 2121 send_rank = redund->send_rank; 2122 recv_rank = redund->recv_rank; 2123 sbuf_nz = redund->sbuf_nz; 2124 rbuf_nz = redund->rbuf_nz; 2125 sbuf_j = redund->sbuf_j; 2126 sbuf_a = redund->sbuf_a; 2127 rbuf_j = redund->rbuf_j; 2128 rbuf_a = redund->rbuf_a; 2129 } 2130 2131 if (reuse == MAT_INITIAL_MATRIX){ 2132 PetscMPIInt subrank,subsize; 2133 PetscInt nleftover,np_subcomm; 2134 /* get the destination processors' id send_rank, nsends and nrecvs */ 2135 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2136 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2137 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2138 np_subcomm = size/nsubcomm; 2139 nleftover = size - nsubcomm*np_subcomm; 2140 nsends = 0; nrecvs = 0; 2141 for (i=0; i<size; i++){ /* i=rank*/ 2142 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2143 send_rank[nsends] = i; nsends++; 2144 recv_rank[nrecvs++] = i; 2145 } 2146 } 2147 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2148 i = size-nleftover-1; 2149 j = 0; 2150 while (j < nsubcomm - nleftover){ 2151 send_rank[nsends++] = i; 2152 i--; j++; 2153 } 2154 } 2155 2156 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2157 for (i=0; i<nleftover; i++){ 2158 recv_rank[nrecvs++] = size-nleftover+i; 2159 } 2160 } 2161 2162 /* allocate sbuf_j, sbuf_a */ 2163 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2164 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2165 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2166 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2167 2168 /* copy mat's local entries into the buffers */ 2169 if (reuse == MAT_INITIAL_MATRIX){ 2170 rownz_max = 0; 2171 rptr = sbuf_j; 2172 cols = sbuf_j + rend-rstart + 1; 2173 vals = sbuf_a; 2174 rptr[0] = 0; 2175 for (i=0; i<rend-rstart; i++){ 2176 row = i + rstart; 2177 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2178 ncols = nzA + nzB; 2179 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2180 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2181 /* load the column indices for this row into cols */ 2182 lwrite = 0; 2183 for (l=0; l<nzB; l++) { 2184 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2185 vals[lwrite] = aworkB[l]; 2186 cols[lwrite++] = ctmp; 2187 } 2188 } 2189 for (l=0; l<nzA; l++){ 2190 vals[lwrite] = aworkA[l]; 2191 cols[lwrite++] = cstart + cworkA[l]; 2192 } 2193 for (l=0; l<nzB; l++) { 2194 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2195 vals[lwrite] = aworkB[l]; 2196 cols[lwrite++] = ctmp; 2197 } 2198 } 2199 vals += ncols; 2200 cols += ncols; 2201 rptr[i+1] = rptr[i] + ncols; 2202 if (rownz_max < ncols) rownz_max = ncols; 2203 } 2204 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2205 } else { /* only copy matrix values into sbuf_a */ 2206 rptr = sbuf_j; 2207 vals = sbuf_a; 2208 rptr[0] = 0; 2209 for (i=0; i<rend-rstart; i++){ 2210 row = i + rstart; 2211 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2212 ncols = nzA + nzB; 2213 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2214 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2215 lwrite = 0; 2216 for (l=0; l<nzB; l++) { 2217 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2218 } 2219 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2220 for (l=0; l<nzB; l++) { 2221 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2222 } 2223 vals += ncols; 2224 rptr[i+1] = rptr[i] + ncols; 2225 } 2226 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2227 2228 /* send nzlocal to others, and recv other's nzlocal */ 2229 /*--------------------------------------------------*/ 2230 if (reuse == MAT_INITIAL_MATRIX){ 2231 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2232 s_waits2 = s_waits3 + nsends; 2233 s_waits1 = s_waits2 + nsends; 2234 r_waits1 = s_waits1 + nsends; 2235 r_waits2 = r_waits1 + nrecvs; 2236 r_waits3 = r_waits2 + nrecvs; 2237 } else { 2238 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2239 r_waits3 = s_waits3 + nsends; 2240 } 2241 2242 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2243 if (reuse == MAT_INITIAL_MATRIX){ 2244 /* get new tags to keep the communication clean */ 2245 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2246 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2247 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2248 2249 /* post receives of other's nzlocal */ 2250 for (i=0; i<nrecvs; i++){ 2251 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2252 } 2253 /* send nzlocal to others */ 2254 for (i=0; i<nsends; i++){ 2255 sbuf_nz[i] = nzlocal; 2256 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2257 } 2258 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2259 count = nrecvs; 2260 while (count) { 2261 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2262 recv_rank[imdex] = recv_status.MPI_SOURCE; 2263 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2264 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2265 2266 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2267 rbuf_nz[imdex] += i + 2; 2268 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2269 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2270 count--; 2271 } 2272 /* wait on sends of nzlocal */ 2273 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2274 /* send mat->i,j to others, and recv from other's */ 2275 /*------------------------------------------------*/ 2276 for (i=0; i<nsends; i++){ 2277 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2278 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2279 } 2280 /* wait on receives of mat->i,j */ 2281 /*------------------------------*/ 2282 count = nrecvs; 2283 while (count) { 2284 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2285 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2286 count--; 2287 } 2288 /* wait on sends of mat->i,j */ 2289 /*---------------------------*/ 2290 if (nsends) { 2291 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2292 } 2293 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2294 2295 /* post receives, send and receive mat->a */ 2296 /*----------------------------------------*/ 2297 for (imdex=0; imdex<nrecvs; imdex++) { 2298 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2299 } 2300 for (i=0; i<nsends; i++){ 2301 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2302 } 2303 count = nrecvs; 2304 while (count) { 2305 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2306 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2307 count--; 2308 } 2309 if (nsends) { 2310 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2311 } 2312 2313 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2314 2315 /* create redundant matrix */ 2316 /*-------------------------*/ 2317 if (reuse == MAT_INITIAL_MATRIX){ 2318 /* compute rownz_max for preallocation */ 2319 for (imdex=0; imdex<nrecvs; imdex++){ 2320 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2321 rptr = rbuf_j[imdex]; 2322 for (i=0; i<j; i++){ 2323 ncols = rptr[i+1] - rptr[i]; 2324 if (rownz_max < ncols) rownz_max = ncols; 2325 } 2326 } 2327 2328 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2329 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2330 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2331 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2332 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2333 } else { 2334 C = *matredundant; 2335 } 2336 2337 /* insert local matrix entries */ 2338 rptr = sbuf_j; 2339 cols = sbuf_j + rend-rstart + 1; 2340 vals = sbuf_a; 2341 for (i=0; i<rend-rstart; i++){ 2342 row = i + rstart; 2343 ncols = rptr[i+1] - rptr[i]; 2344 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2345 vals += ncols; 2346 cols += ncols; 2347 } 2348 /* insert received matrix entries */ 2349 for (imdex=0; imdex<nrecvs; imdex++){ 2350 rstart = rowrange[recv_rank[imdex]]; 2351 rend = rowrange[recv_rank[imdex]+1]; 2352 rptr = rbuf_j[imdex]; 2353 cols = rbuf_j[imdex] + rend-rstart + 1; 2354 vals = rbuf_a[imdex]; 2355 for (i=0; i<rend-rstart; i++){ 2356 row = i + rstart; 2357 ncols = rptr[i+1] - rptr[i]; 2358 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2359 vals += ncols; 2360 cols += ncols; 2361 } 2362 } 2363 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2364 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2365 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2366 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2367 if (reuse == MAT_INITIAL_MATRIX){ 2368 PetscContainer container; 2369 *matredundant = C; 2370 /* create a supporting struct and attach it to C for reuse */ 2371 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2372 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2373 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2374 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2375 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2376 2377 redund->nzlocal = nzlocal; 2378 redund->nsends = nsends; 2379 redund->nrecvs = nrecvs; 2380 redund->send_rank = send_rank; 2381 redund->recv_rank = recv_rank; 2382 redund->sbuf_nz = sbuf_nz; 2383 redund->rbuf_nz = rbuf_nz; 2384 redund->sbuf_j = sbuf_j; 2385 redund->sbuf_a = sbuf_a; 2386 redund->rbuf_j = rbuf_j; 2387 redund->rbuf_a = rbuf_a; 2388 2389 redund->MatDestroy = C->ops->destroy; 2390 C->ops->destroy = MatDestroy_MatRedundant; 2391 } 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2397 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2398 { 2399 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2400 PetscErrorCode ierr; 2401 PetscInt i,*idxb = 0; 2402 PetscScalar *va,*vb; 2403 Vec vtmp; 2404 2405 PetscFunctionBegin; 2406 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2407 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2408 if (idx) { 2409 for (i=0; i<A->rmap->n; i++) { 2410 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2411 } 2412 } 2413 2414 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2415 if (idx) { 2416 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2417 } 2418 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2419 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2420 2421 for (i=0; i<A->rmap->n; i++){ 2422 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2423 va[i] = vb[i]; 2424 if (idx) idx[i] = a->garray[idxb[i]]; 2425 } 2426 } 2427 2428 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2429 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2430 if (idxb) { 2431 ierr = PetscFree(idxb);CHKERRQ(ierr); 2432 } 2433 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 #undef __FUNCT__ 2438 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2439 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2440 { 2441 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2442 PetscErrorCode ierr; 2443 PetscInt i,*idxb = 0; 2444 PetscScalar *va,*vb; 2445 Vec vtmp; 2446 2447 PetscFunctionBegin; 2448 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2449 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2450 if (idx) { 2451 for (i=0; i<A->cmap->n; i++) { 2452 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2453 } 2454 } 2455 2456 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2457 if (idx) { 2458 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2459 } 2460 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2461 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2462 2463 for (i=0; i<A->rmap->n; i++){ 2464 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2465 va[i] = vb[i]; 2466 if (idx) idx[i] = a->garray[idxb[i]]; 2467 } 2468 } 2469 2470 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2471 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2472 if (idxb) { 2473 ierr = PetscFree(idxb);CHKERRQ(ierr); 2474 } 2475 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 #undef __FUNCT__ 2480 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2481 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2482 { 2483 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2484 PetscInt n = A->rmap->n; 2485 PetscInt cstart = A->cmap->rstart; 2486 PetscInt *cmap = mat->garray; 2487 PetscInt *diagIdx, *offdiagIdx; 2488 Vec diagV, offdiagV; 2489 PetscScalar *a, *diagA, *offdiagA; 2490 PetscInt r; 2491 PetscErrorCode ierr; 2492 2493 PetscFunctionBegin; 2494 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2495 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2496 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2497 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2498 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2499 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2500 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2501 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2502 for(r = 0; r < n; ++r) { 2503 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2504 a[r] = diagA[r]; 2505 idx[r] = cstart + diagIdx[r]; 2506 } else { 2507 a[r] = offdiagA[r]; 2508 idx[r] = cmap[offdiagIdx[r]]; 2509 } 2510 } 2511 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2512 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2513 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2514 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2515 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2516 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 #undef __FUNCT__ 2521 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2522 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2523 { 2524 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2525 PetscInt n = A->rmap->n; 2526 PetscInt cstart = A->cmap->rstart; 2527 PetscInt *cmap = mat->garray; 2528 PetscInt *diagIdx, *offdiagIdx; 2529 Vec diagV, offdiagV; 2530 PetscScalar *a, *diagA, *offdiagA; 2531 PetscInt r; 2532 PetscErrorCode ierr; 2533 2534 PetscFunctionBegin; 2535 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2536 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2537 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2538 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2539 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2540 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2541 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2542 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2543 for(r = 0; r < n; ++r) { 2544 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 idx[r] = cstart + diagIdx[r]; 2547 } else { 2548 a[r] = offdiagA[r]; 2549 idx[r] = cmap[offdiagIdx[r]]; 2550 } 2551 } 2552 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2553 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2554 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2555 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2556 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2557 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2563 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2564 { 2565 PetscErrorCode ierr; 2566 Mat *dummy; 2567 2568 PetscFunctionBegin; 2569 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2570 *newmat = *dummy; 2571 ierr = PetscFree(dummy);CHKERRQ(ierr); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2576 /* -------------------------------------------------------------------*/ 2577 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2578 MatGetRow_MPIAIJ, 2579 MatRestoreRow_MPIAIJ, 2580 MatMult_MPIAIJ, 2581 /* 4*/ MatMultAdd_MPIAIJ, 2582 MatMultTranspose_MPIAIJ, 2583 MatMultTransposeAdd_MPIAIJ, 2584 #ifdef PETSC_HAVE_PBGL 2585 MatSolve_MPIAIJ, 2586 #else 2587 0, 2588 #endif 2589 0, 2590 0, 2591 /*10*/ 0, 2592 0, 2593 0, 2594 MatSOR_MPIAIJ, 2595 MatTranspose_MPIAIJ, 2596 /*15*/ MatGetInfo_MPIAIJ, 2597 MatEqual_MPIAIJ, 2598 MatGetDiagonal_MPIAIJ, 2599 MatDiagonalScale_MPIAIJ, 2600 MatNorm_MPIAIJ, 2601 /*20*/ MatAssemblyBegin_MPIAIJ, 2602 MatAssemblyEnd_MPIAIJ, 2603 MatSetOption_MPIAIJ, 2604 MatZeroEntries_MPIAIJ, 2605 /*24*/ MatZeroRows_MPIAIJ, 2606 0, 2607 #ifdef PETSC_HAVE_PBGL 2608 0, 2609 #else 2610 0, 2611 #endif 2612 0, 2613 0, 2614 /*29*/ MatSetUpPreallocation_MPIAIJ, 2615 #ifdef PETSC_HAVE_PBGL 2616 0, 2617 #else 2618 0, 2619 #endif 2620 0, 2621 0, 2622 0, 2623 /*34*/ MatDuplicate_MPIAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /*39*/ MatAXPY_MPIAIJ, 2629 MatGetSubMatrices_MPIAIJ, 2630 MatIncreaseOverlap_MPIAIJ, 2631 MatGetValues_MPIAIJ, 2632 MatCopy_MPIAIJ, 2633 /*44*/ MatGetRowMax_MPIAIJ, 2634 MatScale_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 /*49*/ MatSetBlockSize_MPIAIJ, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /*54*/ MatFDColoringCreate_MPIAIJ, 2644 0, 2645 MatSetUnfactored_MPIAIJ, 2646 MatPermute_MPIAIJ, 2647 0, 2648 /*59*/ MatGetSubMatrix_MPIAIJ, 2649 MatDestroy_MPIAIJ, 2650 MatView_MPIAIJ, 2651 0, 2652 0, 2653 /*64*/ 0, 2654 0, 2655 0, 2656 0, 2657 0, 2658 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2659 MatGetRowMinAbs_MPIAIJ, 2660 0, 2661 MatSetColoring_MPIAIJ, 2662 #if defined(PETSC_HAVE_ADIC) 2663 MatSetValuesAdic_MPIAIJ, 2664 #else 2665 0, 2666 #endif 2667 MatSetValuesAdifor_MPIAIJ, 2668 /*75*/ MatFDColoringApply_AIJ, 2669 0, 2670 0, 2671 0, 2672 0, 2673 /*80*/ 0, 2674 0, 2675 0, 2676 /*83*/ MatLoad_MPIAIJ, 2677 0, 2678 0, 2679 0, 2680 0, 2681 0, 2682 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2683 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2684 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2685 MatPtAP_Basic, 2686 MatPtAPSymbolic_MPIAIJ, 2687 /*94*/ MatPtAPNumeric_MPIAIJ, 2688 0, 2689 0, 2690 0, 2691 0, 2692 /*99*/ 0, 2693 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2694 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2695 MatConjugate_MPIAIJ, 2696 0, 2697 /*104*/MatSetValuesRow_MPIAIJ, 2698 MatRealPart_MPIAIJ, 2699 MatImaginaryPart_MPIAIJ, 2700 0, 2701 0, 2702 /*109*/0, 2703 MatGetRedundantMatrix_MPIAIJ, 2704 MatGetRowMin_MPIAIJ, 2705 0, 2706 0, 2707 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2708 0, 2709 0, 2710 0, 2711 0, 2712 0 2713 }; 2714 2715 /* ----------------------------------------------------------------------------------------*/ 2716 2717 EXTERN_C_BEGIN 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2720 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2721 { 2722 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2723 PetscErrorCode ierr; 2724 2725 PetscFunctionBegin; 2726 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2727 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2728 PetscFunctionReturn(0); 2729 } 2730 EXTERN_C_END 2731 2732 EXTERN_C_BEGIN 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2735 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2736 { 2737 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2738 PetscErrorCode ierr; 2739 2740 PetscFunctionBegin; 2741 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2742 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2743 PetscFunctionReturn(0); 2744 } 2745 EXTERN_C_END 2746 2747 #include "petscpc.h" 2748 EXTERN_C_BEGIN 2749 #undef __FUNCT__ 2750 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2751 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2752 { 2753 Mat_MPIAIJ *b; 2754 PetscErrorCode ierr; 2755 PetscInt i; 2756 2757 PetscFunctionBegin; 2758 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2759 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2760 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2761 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2762 2763 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2764 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2765 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2766 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2767 if (d_nnz) { 2768 for (i=0; i<B->rmap->n; i++) { 2769 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2770 } 2771 } 2772 if (o_nnz) { 2773 for (i=0; i<B->rmap->n; i++) { 2774 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2775 } 2776 } 2777 b = (Mat_MPIAIJ*)B->data; 2778 2779 if (!B->preallocated) { 2780 /* Explicitly create 2 MATSEQAIJ matrices. */ 2781 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2782 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2783 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2785 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2786 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2787 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2788 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2789 } 2790 2791 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2792 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2793 B->preallocated = PETSC_TRUE; 2794 PetscFunctionReturn(0); 2795 } 2796 EXTERN_C_END 2797 2798 #undef __FUNCT__ 2799 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2800 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2801 { 2802 Mat mat; 2803 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2804 PetscErrorCode ierr; 2805 2806 PetscFunctionBegin; 2807 *newmat = 0; 2808 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2809 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2810 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2811 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2812 a = (Mat_MPIAIJ*)mat->data; 2813 2814 mat->factortype = matin->factortype; 2815 mat->rmap->bs = matin->rmap->bs; 2816 mat->assembled = PETSC_TRUE; 2817 mat->insertmode = NOT_SET_VALUES; 2818 mat->preallocated = PETSC_TRUE; 2819 2820 a->size = oldmat->size; 2821 a->rank = oldmat->rank; 2822 a->donotstash = oldmat->donotstash; 2823 a->roworiented = oldmat->roworiented; 2824 a->rowindices = 0; 2825 a->rowvalues = 0; 2826 a->getrowactive = PETSC_FALSE; 2827 2828 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2829 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2830 2831 if (oldmat->colmap) { 2832 #if defined (PETSC_USE_CTABLE) 2833 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2834 #else 2835 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2836 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2837 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2838 #endif 2839 } else a->colmap = 0; 2840 if (oldmat->garray) { 2841 PetscInt len; 2842 len = oldmat->B->cmap->n; 2843 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2844 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2845 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2846 } else a->garray = 0; 2847 2848 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2849 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2850 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2851 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2852 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2854 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2856 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2857 *newmat = mat; 2858 PetscFunctionReturn(0); 2859 } 2860 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "MatLoad_MPIAIJ" 2863 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2864 { 2865 Mat A; 2866 PetscScalar *vals,*svals; 2867 MPI_Comm comm = ((PetscObject)viewer)->comm; 2868 MPI_Status status; 2869 PetscErrorCode ierr; 2870 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2871 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2872 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2873 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2874 PetscInt cend,cstart,n,*rowners; 2875 int fd; 2876 2877 PetscFunctionBegin; 2878 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2879 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2880 if (!rank) { 2881 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2882 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2883 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2884 } 2885 2886 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2887 M = header[1]; N = header[2]; 2888 /* determine ownership of all rows */ 2889 m = M/size + ((M % size) > rank); 2890 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2891 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2892 2893 /* First process needs enough room for process with most rows */ 2894 if (!rank) { 2895 mmax = rowners[1]; 2896 for (i=2; i<size; i++) { 2897 mmax = PetscMax(mmax,rowners[i]); 2898 } 2899 } else mmax = m; 2900 2901 rowners[0] = 0; 2902 for (i=2; i<=size; i++) { 2903 rowners[i] += rowners[i-1]; 2904 } 2905 rstart = rowners[rank]; 2906 rend = rowners[rank+1]; 2907 2908 /* distribute row lengths to all processors */ 2909 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2910 if (!rank) { 2911 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2912 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2913 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2914 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2915 for (j=0; j<m; j++) { 2916 procsnz[0] += ourlens[j]; 2917 } 2918 for (i=1; i<size; i++) { 2919 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2920 /* calculate the number of nonzeros on each processor */ 2921 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2922 procsnz[i] += rowlengths[j]; 2923 } 2924 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2925 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2926 } 2927 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2928 } else { 2929 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2930 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2931 } 2932 2933 if (!rank) { 2934 /* determine max buffer needed and allocate it */ 2935 maxnz = 0; 2936 for (i=0; i<size; i++) { 2937 maxnz = PetscMax(maxnz,procsnz[i]); 2938 } 2939 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2940 2941 /* read in my part of the matrix column indices */ 2942 nz = procsnz[0]; 2943 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2944 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2945 2946 /* read in every one elses and ship off */ 2947 for (i=1; i<size; i++) { 2948 nz = procsnz[i]; 2949 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2950 mpicnt = PetscMPIIntCast(nz); 2951 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2952 } 2953 ierr = PetscFree(cols);CHKERRQ(ierr); 2954 } else { 2955 /* determine buffer space needed for message */ 2956 nz = 0; 2957 for (i=0; i<m; i++) { 2958 nz += ourlens[i]; 2959 } 2960 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2961 2962 /* receive message of column indices*/ 2963 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2964 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2965 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2966 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2967 else if (mpimaxnz < 0) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2968 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2969 } 2970 2971 /* determine column ownership if matrix is not square */ 2972 if (N != M) { 2973 n = N/size + ((N % size) > rank); 2974 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2975 cstart = cend - n; 2976 } else { 2977 cstart = rstart; 2978 cend = rend; 2979 n = cend - cstart; 2980 } 2981 2982 /* loop over local rows, determining number of off diagonal entries */ 2983 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2984 jj = 0; 2985 for (i=0; i<m; i++) { 2986 for (j=0; j<ourlens[i]; j++) { 2987 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2988 jj++; 2989 } 2990 } 2991 2992 /* create our matrix */ 2993 for (i=0; i<m; i++) { 2994 ourlens[i] -= offlens[i]; 2995 } 2996 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2997 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2998 ierr = MatSetType(A,type);CHKERRQ(ierr); 2999 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3000 3001 for (i=0; i<m; i++) { 3002 ourlens[i] += offlens[i]; 3003 } 3004 3005 if (!rank) { 3006 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3007 3008 /* read in my part of the matrix numerical values */ 3009 nz = procsnz[0]; 3010 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3011 3012 /* insert into matrix */ 3013 jj = rstart; 3014 smycols = mycols; 3015 svals = vals; 3016 for (i=0; i<m; i++) { 3017 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3018 smycols += ourlens[i]; 3019 svals += ourlens[i]; 3020 jj++; 3021 } 3022 3023 /* read in other processors and ship out */ 3024 for (i=1; i<size; i++) { 3025 nz = procsnz[i]; 3026 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3027 mpicnt = PetscMPIIntCast(nz); 3028 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3029 } 3030 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3031 } else { 3032 /* receive numeric values */ 3033 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3034 3035 /* receive message of values*/ 3036 mpicnt = PetscMPIIntCast(nz); 3037 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3038 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3039 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3040 else if (mpimaxnz < 0) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3041 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3042 /* insert into matrix */ 3043 jj = rstart; 3044 smycols = mycols; 3045 svals = vals; 3046 for (i=0; i<m; i++) { 3047 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3048 smycols += ourlens[i]; 3049 svals += ourlens[i]; 3050 jj++; 3051 } 3052 } 3053 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3054 ierr = PetscFree(vals);CHKERRQ(ierr); 3055 ierr = PetscFree(mycols);CHKERRQ(ierr); 3056 ierr = PetscFree(rowners);CHKERRQ(ierr); 3057 3058 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3059 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 *newmat = A; 3061 PetscFunctionReturn(0); 3062 } 3063 3064 #undef __FUNCT__ 3065 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3066 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3067 { 3068 PetscErrorCode ierr; 3069 IS iscol_local; 3070 PetscInt csize; 3071 3072 PetscFunctionBegin; 3073 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3074 if (call == MAT_REUSE_MATRIX) { 3075 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3076 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3077 } else { 3078 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3079 } 3080 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3081 if (call == MAT_INITIAL_MATRIX) { 3082 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3083 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3084 } 3085 PetscFunctionReturn(0); 3086 } 3087 3088 #undef __FUNCT__ 3089 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3090 /* 3091 Not great since it makes two copies of the submatrix, first an SeqAIJ 3092 in local and then by concatenating the local matrices the end result. 3093 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3094 3095 Note: This requires a sequential iscol with all indices. 3096 */ 3097 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3098 { 3099 PetscErrorCode ierr; 3100 PetscMPIInt rank,size; 3101 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3102 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3103 Mat *local,M,Mreuse; 3104 MatScalar *vwork,*aa; 3105 MPI_Comm comm = ((PetscObject)mat)->comm; 3106 Mat_SeqAIJ *aij; 3107 3108 3109 PetscFunctionBegin; 3110 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3111 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3112 3113 if (call == MAT_REUSE_MATRIX) { 3114 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3115 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3116 local = &Mreuse; 3117 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3118 } else { 3119 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3120 Mreuse = *local; 3121 ierr = PetscFree(local);CHKERRQ(ierr); 3122 } 3123 3124 /* 3125 m - number of local rows 3126 n - number of columns (same on all processors) 3127 rstart - first row in new global matrix generated 3128 */ 3129 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3130 if (call == MAT_INITIAL_MATRIX) { 3131 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3132 ii = aij->i; 3133 jj = aij->j; 3134 3135 /* 3136 Determine the number of non-zeros in the diagonal and off-diagonal 3137 portions of the matrix in order to do correct preallocation 3138 */ 3139 3140 /* first get start and end of "diagonal" columns */ 3141 if (csize == PETSC_DECIDE) { 3142 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3143 if (mglobal == n) { /* square matrix */ 3144 nlocal = m; 3145 } else { 3146 nlocal = n/size + ((n % size) > rank); 3147 } 3148 } else { 3149 nlocal = csize; 3150 } 3151 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3152 rstart = rend - nlocal; 3153 if (rank == size - 1 && rend != n) { 3154 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3155 } 3156 3157 /* next, compute all the lengths */ 3158 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3159 olens = dlens + m; 3160 for (i=0; i<m; i++) { 3161 jend = ii[i+1] - ii[i]; 3162 olen = 0; 3163 dlen = 0; 3164 for (j=0; j<jend; j++) { 3165 if (*jj < rstart || *jj >= rend) olen++; 3166 else dlen++; 3167 jj++; 3168 } 3169 olens[i] = olen; 3170 dlens[i] = dlen; 3171 } 3172 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3173 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3174 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3175 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3176 ierr = PetscFree(dlens);CHKERRQ(ierr); 3177 } else { 3178 PetscInt ml,nl; 3179 3180 M = *newmat; 3181 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3182 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3183 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3184 /* 3185 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3186 rather than the slower MatSetValues(). 3187 */ 3188 M->was_assembled = PETSC_TRUE; 3189 M->assembled = PETSC_FALSE; 3190 } 3191 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3192 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3193 ii = aij->i; 3194 jj = aij->j; 3195 aa = aij->a; 3196 for (i=0; i<m; i++) { 3197 row = rstart + i; 3198 nz = ii[i+1] - ii[i]; 3199 cwork = jj; jj += nz; 3200 vwork = aa; aa += nz; 3201 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3202 } 3203 3204 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3205 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3206 *newmat = M; 3207 3208 /* save submatrix used in processor for next request */ 3209 if (call == MAT_INITIAL_MATRIX) { 3210 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3211 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3212 } 3213 3214 PetscFunctionReturn(0); 3215 } 3216 3217 EXTERN_C_BEGIN 3218 #undef __FUNCT__ 3219 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3220 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3221 { 3222 PetscInt m,cstart, cend,j,nnz,i,d; 3223 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3224 const PetscInt *JJ; 3225 PetscScalar *values; 3226 PetscErrorCode ierr; 3227 3228 PetscFunctionBegin; 3229 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3230 3231 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3232 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3233 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3234 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3235 m = B->rmap->n; 3236 cstart = B->cmap->rstart; 3237 cend = B->cmap->rend; 3238 rstart = B->rmap->rstart; 3239 3240 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3241 3242 #if defined(PETSC_USE_DEBUGGING) 3243 for (i=0; i<m; i++) { 3244 nnz = Ii[i+1]- Ii[i]; 3245 JJ = J + Ii[i]; 3246 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3247 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3248 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3249 } 3250 #endif 3251 3252 for (i=0; i<m; i++) { 3253 nnz = Ii[i+1]- Ii[i]; 3254 JJ = J + Ii[i]; 3255 nnz_max = PetscMax(nnz_max,nnz); 3256 d = 0; 3257 for (j=0; j<nnz; j++) { 3258 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3259 } 3260 d_nnz[i] = d; 3261 o_nnz[i] = nnz - d; 3262 } 3263 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3264 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3265 3266 if (v) values = (PetscScalar*)v; 3267 else { 3268 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3269 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3270 } 3271 3272 for (i=0; i<m; i++) { 3273 ii = i + rstart; 3274 nnz = Ii[i+1]- Ii[i]; 3275 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3276 } 3277 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3278 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3279 3280 if (!v) { 3281 ierr = PetscFree(values);CHKERRQ(ierr); 3282 } 3283 PetscFunctionReturn(0); 3284 } 3285 EXTERN_C_END 3286 3287 #undef __FUNCT__ 3288 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3289 /*@ 3290 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3291 (the default parallel PETSc format). 3292 3293 Collective on MPI_Comm 3294 3295 Input Parameters: 3296 + B - the matrix 3297 . i - the indices into j for the start of each local row (starts with zero) 3298 . j - the column indices for each local row (starts with zero) 3299 - v - optional values in the matrix 3300 3301 Level: developer 3302 3303 Notes: 3304 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3305 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3306 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3307 3308 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3309 3310 The format which is used for the sparse matrix input, is equivalent to a 3311 row-major ordering.. i.e for the following matrix, the input data expected is 3312 as shown: 3313 3314 1 0 0 3315 2 0 3 P0 3316 ------- 3317 4 5 6 P1 3318 3319 Process0 [P0]: rows_owned=[0,1] 3320 i = {0,1,3} [size = nrow+1 = 2+1] 3321 j = {0,0,2} [size = nz = 6] 3322 v = {1,2,3} [size = nz = 6] 3323 3324 Process1 [P1]: rows_owned=[2] 3325 i = {0,3} [size = nrow+1 = 1+1] 3326 j = {0,1,2} [size = nz = 6] 3327 v = {4,5,6} [size = nz = 6] 3328 3329 .keywords: matrix, aij, compressed row, sparse, parallel 3330 3331 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3332 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3333 @*/ 3334 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3335 { 3336 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3337 3338 PetscFunctionBegin; 3339 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3340 if (f) { 3341 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3342 } 3343 PetscFunctionReturn(0); 3344 } 3345 3346 #undef __FUNCT__ 3347 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3348 /*@C 3349 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3350 (the default parallel PETSc format). For good matrix assembly performance 3351 the user should preallocate the matrix storage by setting the parameters 3352 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3353 performance can be increased by more than a factor of 50. 3354 3355 Collective on MPI_Comm 3356 3357 Input Parameters: 3358 + A - the matrix 3359 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3360 (same value is used for all local rows) 3361 . d_nnz - array containing the number of nonzeros in the various rows of the 3362 DIAGONAL portion of the local submatrix (possibly different for each row) 3363 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3364 The size of this array is equal to the number of local rows, i.e 'm'. 3365 You must leave room for the diagonal entry even if it is zero. 3366 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3367 submatrix (same value is used for all local rows). 3368 - o_nnz - array containing the number of nonzeros in the various rows of the 3369 OFF-DIAGONAL portion of the local submatrix (possibly different for 3370 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3371 structure. The size of this array is equal to the number 3372 of local rows, i.e 'm'. 3373 3374 If the *_nnz parameter is given then the *_nz parameter is ignored 3375 3376 The AIJ format (also called the Yale sparse matrix format or 3377 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3378 storage. The stored row and column indices begin with zero. See the users manual for details. 3379 3380 The parallel matrix is partitioned such that the first m0 rows belong to 3381 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3382 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3383 3384 The DIAGONAL portion of the local submatrix of a processor can be defined 3385 as the submatrix which is obtained by extraction the part corresponding 3386 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3387 first row that belongs to the processor, and r2 is the last row belonging 3388 to the this processor. This is a square mxm matrix. The remaining portion 3389 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3390 3391 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3392 3393 You can call MatGetInfo() to get information on how effective the preallocation was; 3394 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3395 You can also run with the option -info and look for messages with the string 3396 malloc in them to see if additional memory allocation was needed. 3397 3398 Example usage: 3399 3400 Consider the following 8x8 matrix with 34 non-zero values, that is 3401 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3402 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3403 as follows: 3404 3405 .vb 3406 1 2 0 | 0 3 0 | 0 4 3407 Proc0 0 5 6 | 7 0 0 | 8 0 3408 9 0 10 | 11 0 0 | 12 0 3409 ------------------------------------- 3410 13 0 14 | 15 16 17 | 0 0 3411 Proc1 0 18 0 | 19 20 21 | 0 0 3412 0 0 0 | 22 23 0 | 24 0 3413 ------------------------------------- 3414 Proc2 25 26 27 | 0 0 28 | 29 0 3415 30 0 0 | 31 32 33 | 0 34 3416 .ve 3417 3418 This can be represented as a collection of submatrices as: 3419 3420 .vb 3421 A B C 3422 D E F 3423 G H I 3424 .ve 3425 3426 Where the submatrices A,B,C are owned by proc0, D,E,F are 3427 owned by proc1, G,H,I are owned by proc2. 3428 3429 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3430 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3431 The 'M','N' parameters are 8,8, and have the same values on all procs. 3432 3433 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3434 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3435 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3436 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3437 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3438 matrix, ans [DF] as another SeqAIJ matrix. 3439 3440 When d_nz, o_nz parameters are specified, d_nz storage elements are 3441 allocated for every row of the local diagonal submatrix, and o_nz 3442 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3443 One way to choose d_nz and o_nz is to use the max nonzerors per local 3444 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3445 In this case, the values of d_nz,o_nz are: 3446 .vb 3447 proc0 : dnz = 2, o_nz = 2 3448 proc1 : dnz = 3, o_nz = 2 3449 proc2 : dnz = 1, o_nz = 4 3450 .ve 3451 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3452 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3453 for proc3. i.e we are using 12+15+10=37 storage locations to store 3454 34 values. 3455 3456 When d_nnz, o_nnz parameters are specified, the storage is specified 3457 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3458 In the above case the values for d_nnz,o_nnz are: 3459 .vb 3460 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3461 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3462 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3463 .ve 3464 Here the space allocated is sum of all the above values i.e 34, and 3465 hence pre-allocation is perfect. 3466 3467 Level: intermediate 3468 3469 .keywords: matrix, aij, compressed row, sparse, parallel 3470 3471 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3472 MPIAIJ, MatGetInfo() 3473 @*/ 3474 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3475 { 3476 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3477 3478 PetscFunctionBegin; 3479 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3480 if (f) { 3481 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3482 } 3483 PetscFunctionReturn(0); 3484 } 3485 3486 #undef __FUNCT__ 3487 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3488 /*@ 3489 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3490 CSR format the local rows. 3491 3492 Collective on MPI_Comm 3493 3494 Input Parameters: 3495 + comm - MPI communicator 3496 . m - number of local rows (Cannot be PETSC_DECIDE) 3497 . n - This value should be the same as the local size used in creating the 3498 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3499 calculated if N is given) For square matrices n is almost always m. 3500 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3501 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3502 . i - row indices 3503 . j - column indices 3504 - a - matrix values 3505 3506 Output Parameter: 3507 . mat - the matrix 3508 3509 Level: intermediate 3510 3511 Notes: 3512 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3513 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3514 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3515 3516 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3517 3518 The format which is used for the sparse matrix input, is equivalent to a 3519 row-major ordering.. i.e for the following matrix, the input data expected is 3520 as shown: 3521 3522 1 0 0 3523 2 0 3 P0 3524 ------- 3525 4 5 6 P1 3526 3527 Process0 [P0]: rows_owned=[0,1] 3528 i = {0,1,3} [size = nrow+1 = 2+1] 3529 j = {0,0,2} [size = nz = 6] 3530 v = {1,2,3} [size = nz = 6] 3531 3532 Process1 [P1]: rows_owned=[2] 3533 i = {0,3} [size = nrow+1 = 1+1] 3534 j = {0,1,2} [size = nz = 6] 3535 v = {4,5,6} [size = nz = 6] 3536 3537 .keywords: matrix, aij, compressed row, sparse, parallel 3538 3539 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3540 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3541 @*/ 3542 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3543 { 3544 PetscErrorCode ierr; 3545 3546 PetscFunctionBegin; 3547 if (i[0]) { 3548 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3549 } 3550 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3551 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3552 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3553 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3554 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3555 PetscFunctionReturn(0); 3556 } 3557 3558 #undef __FUNCT__ 3559 #define __FUNCT__ "MatCreateMPIAIJ" 3560 /*@C 3561 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3562 (the default parallel PETSc format). For good matrix assembly performance 3563 the user should preallocate the matrix storage by setting the parameters 3564 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3565 performance can be increased by more than a factor of 50. 3566 3567 Collective on MPI_Comm 3568 3569 Input Parameters: 3570 + comm - MPI communicator 3571 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3572 This value should be the same as the local size used in creating the 3573 y vector for the matrix-vector product y = Ax. 3574 . n - This value should be the same as the local size used in creating the 3575 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3576 calculated if N is given) For square matrices n is almost always m. 3577 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3578 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3579 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3580 (same value is used for all local rows) 3581 . d_nnz - array containing the number of nonzeros in the various rows of the 3582 DIAGONAL portion of the local submatrix (possibly different for each row) 3583 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3584 The size of this array is equal to the number of local rows, i.e 'm'. 3585 You must leave room for the diagonal entry even if it is zero. 3586 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3587 submatrix (same value is used for all local rows). 3588 - o_nnz - array containing the number of nonzeros in the various rows of the 3589 OFF-DIAGONAL portion of the local submatrix (possibly different for 3590 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3591 structure. The size of this array is equal to the number 3592 of local rows, i.e 'm'. 3593 3594 Output Parameter: 3595 . A - the matrix 3596 3597 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3598 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3599 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3600 3601 Notes: 3602 If the *_nnz parameter is given then the *_nz parameter is ignored 3603 3604 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3605 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3606 storage requirements for this matrix. 3607 3608 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3609 processor than it must be used on all processors that share the object for 3610 that argument. 3611 3612 The user MUST specify either the local or global matrix dimensions 3613 (possibly both). 3614 3615 The parallel matrix is partitioned across processors such that the 3616 first m0 rows belong to process 0, the next m1 rows belong to 3617 process 1, the next m2 rows belong to process 2 etc.. where 3618 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3619 values corresponding to [m x N] submatrix. 3620 3621 The columns are logically partitioned with the n0 columns belonging 3622 to 0th partition, the next n1 columns belonging to the next 3623 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3624 3625 The DIAGONAL portion of the local submatrix on any given processor 3626 is the submatrix corresponding to the rows and columns m,n 3627 corresponding to the given processor. i.e diagonal matrix on 3628 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3629 etc. The remaining portion of the local submatrix [m x (N-n)] 3630 constitute the OFF-DIAGONAL portion. The example below better 3631 illustrates this concept. 3632 3633 For a square global matrix we define each processor's diagonal portion 3634 to be its local rows and the corresponding columns (a square submatrix); 3635 each processor's off-diagonal portion encompasses the remainder of the 3636 local matrix (a rectangular submatrix). 3637 3638 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3639 3640 When calling this routine with a single process communicator, a matrix of 3641 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3642 type of communicator, use the construction mechanism: 3643 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3644 3645 By default, this format uses inodes (identical nodes) when possible. 3646 We search for consecutive rows with the same nonzero structure, thereby 3647 reusing matrix information to achieve increased efficiency. 3648 3649 Options Database Keys: 3650 + -mat_no_inode - Do not use inodes 3651 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3652 - -mat_aij_oneindex - Internally use indexing starting at 1 3653 rather than 0. Note that when calling MatSetValues(), 3654 the user still MUST index entries starting at 0! 3655 3656 3657 Example usage: 3658 3659 Consider the following 8x8 matrix with 34 non-zero values, that is 3660 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3661 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3662 as follows: 3663 3664 .vb 3665 1 2 0 | 0 3 0 | 0 4 3666 Proc0 0 5 6 | 7 0 0 | 8 0 3667 9 0 10 | 11 0 0 | 12 0 3668 ------------------------------------- 3669 13 0 14 | 15 16 17 | 0 0 3670 Proc1 0 18 0 | 19 20 21 | 0 0 3671 0 0 0 | 22 23 0 | 24 0 3672 ------------------------------------- 3673 Proc2 25 26 27 | 0 0 28 | 29 0 3674 30 0 0 | 31 32 33 | 0 34 3675 .ve 3676 3677 This can be represented as a collection of submatrices as: 3678 3679 .vb 3680 A B C 3681 D E F 3682 G H I 3683 .ve 3684 3685 Where the submatrices A,B,C are owned by proc0, D,E,F are 3686 owned by proc1, G,H,I are owned by proc2. 3687 3688 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3689 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3690 The 'M','N' parameters are 8,8, and have the same values on all procs. 3691 3692 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3693 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3694 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3695 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3696 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3697 matrix, ans [DF] as another SeqAIJ matrix. 3698 3699 When d_nz, o_nz parameters are specified, d_nz storage elements are 3700 allocated for every row of the local diagonal submatrix, and o_nz 3701 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3702 One way to choose d_nz and o_nz is to use the max nonzerors per local 3703 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3704 In this case, the values of d_nz,o_nz are: 3705 .vb 3706 proc0 : dnz = 2, o_nz = 2 3707 proc1 : dnz = 3, o_nz = 2 3708 proc2 : dnz = 1, o_nz = 4 3709 .ve 3710 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3711 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3712 for proc3. i.e we are using 12+15+10=37 storage locations to store 3713 34 values. 3714 3715 When d_nnz, o_nnz parameters are specified, the storage is specified 3716 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3717 In the above case the values for d_nnz,o_nnz are: 3718 .vb 3719 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3720 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3721 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3722 .ve 3723 Here the space allocated is sum of all the above values i.e 34, and 3724 hence pre-allocation is perfect. 3725 3726 Level: intermediate 3727 3728 .keywords: matrix, aij, compressed row, sparse, parallel 3729 3730 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3731 MPIAIJ, MatCreateMPIAIJWithArrays() 3732 @*/ 3733 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3734 { 3735 PetscErrorCode ierr; 3736 PetscMPIInt size; 3737 3738 PetscFunctionBegin; 3739 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3740 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3741 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3742 if (size > 1) { 3743 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3744 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3745 } else { 3746 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3747 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3748 } 3749 PetscFunctionReturn(0); 3750 } 3751 3752 #undef __FUNCT__ 3753 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3754 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3755 { 3756 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3757 3758 PetscFunctionBegin; 3759 *Ad = a->A; 3760 *Ao = a->B; 3761 *colmap = a->garray; 3762 PetscFunctionReturn(0); 3763 } 3764 3765 #undef __FUNCT__ 3766 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3767 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3768 { 3769 PetscErrorCode ierr; 3770 PetscInt i; 3771 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3772 3773 PetscFunctionBegin; 3774 if (coloring->ctype == IS_COLORING_GLOBAL) { 3775 ISColoringValue *allcolors,*colors; 3776 ISColoring ocoloring; 3777 3778 /* set coloring for diagonal portion */ 3779 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3780 3781 /* set coloring for off-diagonal portion */ 3782 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3783 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3784 for (i=0; i<a->B->cmap->n; i++) { 3785 colors[i] = allcolors[a->garray[i]]; 3786 } 3787 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3788 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3789 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3790 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3791 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3792 ISColoringValue *colors; 3793 PetscInt *larray; 3794 ISColoring ocoloring; 3795 3796 /* set coloring for diagonal portion */ 3797 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3798 for (i=0; i<a->A->cmap->n; i++) { 3799 larray[i] = i + A->cmap->rstart; 3800 } 3801 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3802 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3803 for (i=0; i<a->A->cmap->n; i++) { 3804 colors[i] = coloring->colors[larray[i]]; 3805 } 3806 ierr = PetscFree(larray);CHKERRQ(ierr); 3807 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3808 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3809 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3810 3811 /* set coloring for off-diagonal portion */ 3812 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3813 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3814 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3815 for (i=0; i<a->B->cmap->n; i++) { 3816 colors[i] = coloring->colors[larray[i]]; 3817 } 3818 ierr = PetscFree(larray);CHKERRQ(ierr); 3819 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3820 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3821 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3822 } else { 3823 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3824 } 3825 3826 PetscFunctionReturn(0); 3827 } 3828 3829 #if defined(PETSC_HAVE_ADIC) 3830 #undef __FUNCT__ 3831 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3832 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3833 { 3834 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3835 PetscErrorCode ierr; 3836 3837 PetscFunctionBegin; 3838 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3839 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3840 PetscFunctionReturn(0); 3841 } 3842 #endif 3843 3844 #undef __FUNCT__ 3845 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3846 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3847 { 3848 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3849 PetscErrorCode ierr; 3850 3851 PetscFunctionBegin; 3852 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3853 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3854 PetscFunctionReturn(0); 3855 } 3856 3857 #undef __FUNCT__ 3858 #define __FUNCT__ "MatMerge" 3859 /*@ 3860 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3861 matrices from each processor 3862 3863 Collective on MPI_Comm 3864 3865 Input Parameters: 3866 + comm - the communicators the parallel matrix will live on 3867 . inmat - the input sequential matrices 3868 . n - number of local columns (or PETSC_DECIDE) 3869 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3870 3871 Output Parameter: 3872 . outmat - the parallel matrix generated 3873 3874 Level: advanced 3875 3876 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3877 3878 @*/ 3879 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3880 { 3881 PetscErrorCode ierr; 3882 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3883 PetscInt *indx; 3884 PetscScalar *values; 3885 3886 PetscFunctionBegin; 3887 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3888 if (scall == MAT_INITIAL_MATRIX){ 3889 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3890 if (n == PETSC_DECIDE){ 3891 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3892 } 3893 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3894 rstart -= m; 3895 3896 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3897 for (i=0;i<m;i++) { 3898 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3899 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3900 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3901 } 3902 /* This routine will ONLY return MPIAIJ type matrix */ 3903 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3904 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3905 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3906 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3907 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3908 3909 } else if (scall == MAT_REUSE_MATRIX){ 3910 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3911 } else { 3912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3913 } 3914 3915 for (i=0;i<m;i++) { 3916 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3917 Ii = i + rstart; 3918 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3919 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3920 } 3921 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3922 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3923 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3924 3925 PetscFunctionReturn(0); 3926 } 3927 3928 #undef __FUNCT__ 3929 #define __FUNCT__ "MatFileSplit" 3930 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3931 { 3932 PetscErrorCode ierr; 3933 PetscMPIInt rank; 3934 PetscInt m,N,i,rstart,nnz; 3935 size_t len; 3936 const PetscInt *indx; 3937 PetscViewer out; 3938 char *name; 3939 Mat B; 3940 const PetscScalar *values; 3941 3942 PetscFunctionBegin; 3943 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3944 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3945 /* Should this be the type of the diagonal block of A? */ 3946 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3947 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3948 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3949 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3950 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3951 for (i=0;i<m;i++) { 3952 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3953 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3954 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3955 } 3956 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3957 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3958 3959 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3960 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3961 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3962 sprintf(name,"%s.%d",outfile,rank); 3963 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3964 ierr = PetscFree(name); 3965 ierr = MatView(B,out);CHKERRQ(ierr); 3966 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3967 ierr = MatDestroy(B);CHKERRQ(ierr); 3968 PetscFunctionReturn(0); 3969 } 3970 3971 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3972 #undef __FUNCT__ 3973 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3974 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3975 { 3976 PetscErrorCode ierr; 3977 Mat_Merge_SeqsToMPI *merge; 3978 PetscContainer container; 3979 3980 PetscFunctionBegin; 3981 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3982 if (container) { 3983 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3984 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3985 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3986 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3987 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3988 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3989 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3990 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3991 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3996 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 3997 3998 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3999 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4000 } 4001 ierr = PetscFree(merge);CHKERRQ(ierr); 4002 4003 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4004 PetscFunctionReturn(0); 4005 } 4006 4007 #include "../src/mat/utils/freespace.h" 4008 #include "petscbt.h" 4009 4010 #undef __FUNCT__ 4011 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4012 /*@C 4013 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4014 matrices from each processor 4015 4016 Collective on MPI_Comm 4017 4018 Input Parameters: 4019 + comm - the communicators the parallel matrix will live on 4020 . seqmat - the input sequential matrices 4021 . m - number of local rows (or PETSC_DECIDE) 4022 . n - number of local columns (or PETSC_DECIDE) 4023 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4024 4025 Output Parameter: 4026 . mpimat - the parallel matrix generated 4027 4028 Level: advanced 4029 4030 Notes: 4031 The dimensions of the sequential matrix in each processor MUST be the same. 4032 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4033 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4034 @*/ 4035 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4036 { 4037 PetscErrorCode ierr; 4038 MPI_Comm comm=((PetscObject)mpimat)->comm; 4039 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4040 PetscMPIInt size,rank,taga,*len_s; 4041 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4042 PetscInt proc,m; 4043 PetscInt **buf_ri,**buf_rj; 4044 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4045 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4046 MPI_Request *s_waits,*r_waits; 4047 MPI_Status *status; 4048 MatScalar *aa=a->a; 4049 MatScalar **abuf_r,*ba_i; 4050 Mat_Merge_SeqsToMPI *merge; 4051 PetscContainer container; 4052 4053 PetscFunctionBegin; 4054 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4055 4056 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4057 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4058 4059 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4060 if (container) { 4061 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4062 } 4063 bi = merge->bi; 4064 bj = merge->bj; 4065 buf_ri = merge->buf_ri; 4066 buf_rj = merge->buf_rj; 4067 4068 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4069 owners = merge->rowmap->range; 4070 len_s = merge->len_s; 4071 4072 /* send and recv matrix values */ 4073 /*-----------------------------*/ 4074 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4075 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4076 4077 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4078 for (proc=0,k=0; proc<size; proc++){ 4079 if (!len_s[proc]) continue; 4080 i = owners[proc]; 4081 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4082 k++; 4083 } 4084 4085 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4086 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4087 ierr = PetscFree(status);CHKERRQ(ierr); 4088 4089 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4090 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4091 4092 /* insert mat values of mpimat */ 4093 /*----------------------------*/ 4094 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4095 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4096 4097 for (k=0; k<merge->nrecv; k++){ 4098 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4099 nrows = *(buf_ri_k[k]); 4100 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4101 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4102 } 4103 4104 /* set values of ba */ 4105 m = merge->rowmap->n; 4106 for (i=0; i<m; i++) { 4107 arow = owners[rank] + i; 4108 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4109 bnzi = bi[i+1] - bi[i]; 4110 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4111 4112 /* add local non-zero vals of this proc's seqmat into ba */ 4113 anzi = ai[arow+1] - ai[arow]; 4114 aj = a->j + ai[arow]; 4115 aa = a->a + ai[arow]; 4116 nextaj = 0; 4117 for (j=0; nextaj<anzi; j++){ 4118 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4119 ba_i[j] += aa[nextaj++]; 4120 } 4121 } 4122 4123 /* add received vals into ba */ 4124 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4125 /* i-th row */ 4126 if (i == *nextrow[k]) { 4127 anzi = *(nextai[k]+1) - *nextai[k]; 4128 aj = buf_rj[k] + *(nextai[k]); 4129 aa = abuf_r[k] + *(nextai[k]); 4130 nextaj = 0; 4131 for (j=0; nextaj<anzi; j++){ 4132 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4133 ba_i[j] += aa[nextaj++]; 4134 } 4135 } 4136 nextrow[k]++; nextai[k]++; 4137 } 4138 } 4139 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4140 } 4141 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4142 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4143 4144 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4145 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4146 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4147 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4148 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4149 PetscFunctionReturn(0); 4150 } 4151 4152 #undef __FUNCT__ 4153 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4154 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4155 { 4156 PetscErrorCode ierr; 4157 Mat B_mpi; 4158 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4159 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4160 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4161 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4162 PetscInt len,proc,*dnz,*onz; 4163 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4164 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4165 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4166 MPI_Status *status; 4167 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4168 PetscBT lnkbt; 4169 Mat_Merge_SeqsToMPI *merge; 4170 PetscContainer container; 4171 4172 PetscFunctionBegin; 4173 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4174 4175 /* make sure it is a PETSc comm */ 4176 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4177 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4178 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4179 4180 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4181 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4182 4183 /* determine row ownership */ 4184 /*---------------------------------------------------------*/ 4185 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4186 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4187 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4188 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4189 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4190 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4191 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4192 4193 m = merge->rowmap->n; 4194 M = merge->rowmap->N; 4195 owners = merge->rowmap->range; 4196 4197 /* determine the number of messages to send, their lengths */ 4198 /*---------------------------------------------------------*/ 4199 len_s = merge->len_s; 4200 4201 len = 0; /* length of buf_si[] */ 4202 merge->nsend = 0; 4203 for (proc=0; proc<size; proc++){ 4204 len_si[proc] = 0; 4205 if (proc == rank){ 4206 len_s[proc] = 0; 4207 } else { 4208 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4209 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4210 } 4211 if (len_s[proc]) { 4212 merge->nsend++; 4213 nrows = 0; 4214 for (i=owners[proc]; i<owners[proc+1]; i++){ 4215 if (ai[i+1] > ai[i]) nrows++; 4216 } 4217 len_si[proc] = 2*(nrows+1); 4218 len += len_si[proc]; 4219 } 4220 } 4221 4222 /* determine the number and length of messages to receive for ij-structure */ 4223 /*-------------------------------------------------------------------------*/ 4224 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4225 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4226 4227 /* post the Irecv of j-structure */ 4228 /*-------------------------------*/ 4229 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4230 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4231 4232 /* post the Isend of j-structure */ 4233 /*--------------------------------*/ 4234 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4235 4236 for (proc=0, k=0; proc<size; proc++){ 4237 if (!len_s[proc]) continue; 4238 i = owners[proc]; 4239 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4240 k++; 4241 } 4242 4243 /* receives and sends of j-structure are complete */ 4244 /*------------------------------------------------*/ 4245 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4246 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4247 4248 /* send and recv i-structure */ 4249 /*---------------------------*/ 4250 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4251 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4252 4253 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4254 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4255 for (proc=0,k=0; proc<size; proc++){ 4256 if (!len_s[proc]) continue; 4257 /* form outgoing message for i-structure: 4258 buf_si[0]: nrows to be sent 4259 [1:nrows]: row index (global) 4260 [nrows+1:2*nrows+1]: i-structure index 4261 */ 4262 /*-------------------------------------------*/ 4263 nrows = len_si[proc]/2 - 1; 4264 buf_si_i = buf_si + nrows+1; 4265 buf_si[0] = nrows; 4266 buf_si_i[0] = 0; 4267 nrows = 0; 4268 for (i=owners[proc]; i<owners[proc+1]; i++){ 4269 anzi = ai[i+1] - ai[i]; 4270 if (anzi) { 4271 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4272 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4273 nrows++; 4274 } 4275 } 4276 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4277 k++; 4278 buf_si += len_si[proc]; 4279 } 4280 4281 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4282 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4283 4284 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4285 for (i=0; i<merge->nrecv; i++){ 4286 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4287 } 4288 4289 ierr = PetscFree(len_si);CHKERRQ(ierr); 4290 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4291 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4292 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4293 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4294 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4295 ierr = PetscFree(status);CHKERRQ(ierr); 4296 4297 /* compute a local seq matrix in each processor */ 4298 /*----------------------------------------------*/ 4299 /* allocate bi array and free space for accumulating nonzero column info */ 4300 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4301 bi[0] = 0; 4302 4303 /* create and initialize a linked list */ 4304 nlnk = N+1; 4305 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4306 4307 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4308 len = 0; 4309 len = ai[owners[rank+1]] - ai[owners[rank]]; 4310 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4311 current_space = free_space; 4312 4313 /* determine symbolic info for each local row */ 4314 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4315 4316 for (k=0; k<merge->nrecv; k++){ 4317 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4318 nrows = *buf_ri_k[k]; 4319 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4320 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4321 } 4322 4323 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4324 len = 0; 4325 for (i=0;i<m;i++) { 4326 bnzi = 0; 4327 /* add local non-zero cols of this proc's seqmat into lnk */ 4328 arow = owners[rank] + i; 4329 anzi = ai[arow+1] - ai[arow]; 4330 aj = a->j + ai[arow]; 4331 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4332 bnzi += nlnk; 4333 /* add received col data into lnk */ 4334 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4335 if (i == *nextrow[k]) { /* i-th row */ 4336 anzi = *(nextai[k]+1) - *nextai[k]; 4337 aj = buf_rj[k] + *nextai[k]; 4338 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4339 bnzi += nlnk; 4340 nextrow[k]++; nextai[k]++; 4341 } 4342 } 4343 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4344 4345 /* if free space is not available, make more free space */ 4346 if (current_space->local_remaining<bnzi) { 4347 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4348 nspacedouble++; 4349 } 4350 /* copy data into free space, then initialize lnk */ 4351 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4352 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4353 4354 current_space->array += bnzi; 4355 current_space->local_used += bnzi; 4356 current_space->local_remaining -= bnzi; 4357 4358 bi[i+1] = bi[i] + bnzi; 4359 } 4360 4361 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4362 4363 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4364 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4365 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4366 4367 /* create symbolic parallel matrix B_mpi */ 4368 /*---------------------------------------*/ 4369 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4370 if (n==PETSC_DECIDE) { 4371 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4372 } else { 4373 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4374 } 4375 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4376 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4377 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4378 4379 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4380 B_mpi->assembled = PETSC_FALSE; 4381 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4382 merge->bi = bi; 4383 merge->bj = bj; 4384 merge->buf_ri = buf_ri; 4385 merge->buf_rj = buf_rj; 4386 merge->coi = PETSC_NULL; 4387 merge->coj = PETSC_NULL; 4388 merge->owners_co = PETSC_NULL; 4389 4390 /* attach the supporting struct to B_mpi for reuse */ 4391 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4392 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4393 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4394 *mpimat = B_mpi; 4395 4396 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4397 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4398 PetscFunctionReturn(0); 4399 } 4400 4401 #undef __FUNCT__ 4402 #define __FUNCT__ "MatMerge_SeqsToMPI" 4403 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4404 { 4405 PetscErrorCode ierr; 4406 4407 PetscFunctionBegin; 4408 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4409 if (scall == MAT_INITIAL_MATRIX){ 4410 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4411 } 4412 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4413 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4414 PetscFunctionReturn(0); 4415 } 4416 4417 #undef __FUNCT__ 4418 #define __FUNCT__ "MatGetLocalMat" 4419 /*@ 4420 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4421 4422 Not Collective 4423 4424 Input Parameters: 4425 + A - the matrix 4426 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4427 4428 Output Parameter: 4429 . A_loc - the local sequential matrix generated 4430 4431 Level: developer 4432 4433 @*/ 4434 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4435 { 4436 PetscErrorCode ierr; 4437 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4438 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4439 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4440 MatScalar *aa=a->a,*ba=b->a,*cam; 4441 PetscScalar *ca; 4442 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4443 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4444 4445 PetscFunctionBegin; 4446 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4447 if (scall == MAT_INITIAL_MATRIX){ 4448 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4449 ci[0] = 0; 4450 for (i=0; i<am; i++){ 4451 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4452 } 4453 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4454 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4455 k = 0; 4456 for (i=0; i<am; i++) { 4457 ncols_o = bi[i+1] - bi[i]; 4458 ncols_d = ai[i+1] - ai[i]; 4459 /* off-diagonal portion of A */ 4460 for (jo=0; jo<ncols_o; jo++) { 4461 col = cmap[*bj]; 4462 if (col >= cstart) break; 4463 cj[k] = col; bj++; 4464 ca[k++] = *ba++; 4465 } 4466 /* diagonal portion of A */ 4467 for (j=0; j<ncols_d; j++) { 4468 cj[k] = cstart + *aj++; 4469 ca[k++] = *aa++; 4470 } 4471 /* off-diagonal portion of A */ 4472 for (j=jo; j<ncols_o; j++) { 4473 cj[k] = cmap[*bj++]; 4474 ca[k++] = *ba++; 4475 } 4476 } 4477 /* put together the new matrix */ 4478 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4479 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4480 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4481 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4482 mat->free_a = PETSC_TRUE; 4483 mat->free_ij = PETSC_TRUE; 4484 mat->nonew = 0; 4485 } else if (scall == MAT_REUSE_MATRIX){ 4486 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4487 ci = mat->i; cj = mat->j; cam = mat->a; 4488 for (i=0; i<am; i++) { 4489 /* off-diagonal portion of A */ 4490 ncols_o = bi[i+1] - bi[i]; 4491 for (jo=0; jo<ncols_o; jo++) { 4492 col = cmap[*bj]; 4493 if (col >= cstart) break; 4494 *cam++ = *ba++; bj++; 4495 } 4496 /* diagonal portion of A */ 4497 ncols_d = ai[i+1] - ai[i]; 4498 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4499 /* off-diagonal portion of A */ 4500 for (j=jo; j<ncols_o; j++) { 4501 *cam++ = *ba++; bj++; 4502 } 4503 } 4504 } else { 4505 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4506 } 4507 4508 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "MatGetLocalMatCondensed" 4514 /*@C 4515 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4516 4517 Not Collective 4518 4519 Input Parameters: 4520 + A - the matrix 4521 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4522 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4523 4524 Output Parameter: 4525 . A_loc - the local sequential matrix generated 4526 4527 Level: developer 4528 4529 @*/ 4530 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4531 { 4532 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4533 PetscErrorCode ierr; 4534 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4535 IS isrowa,iscola; 4536 Mat *aloc; 4537 4538 PetscFunctionBegin; 4539 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4540 if (!row){ 4541 start = A->rmap->rstart; end = A->rmap->rend; 4542 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4543 } else { 4544 isrowa = *row; 4545 } 4546 if (!col){ 4547 start = A->cmap->rstart; 4548 cmap = a->garray; 4549 nzA = a->A->cmap->n; 4550 nzB = a->B->cmap->n; 4551 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4552 ncols = 0; 4553 for (i=0; i<nzB; i++) { 4554 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4555 else break; 4556 } 4557 imark = i; 4558 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4559 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4560 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4561 ierr = PetscFree(idx);CHKERRQ(ierr); 4562 } else { 4563 iscola = *col; 4564 } 4565 if (scall != MAT_INITIAL_MATRIX){ 4566 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4567 aloc[0] = *A_loc; 4568 } 4569 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4570 *A_loc = aloc[0]; 4571 ierr = PetscFree(aloc);CHKERRQ(ierr); 4572 if (!row){ 4573 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4574 } 4575 if (!col){ 4576 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4577 } 4578 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4579 PetscFunctionReturn(0); 4580 } 4581 4582 #undef __FUNCT__ 4583 #define __FUNCT__ "MatGetBrowsOfAcols" 4584 /*@C 4585 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4586 4587 Collective on Mat 4588 4589 Input Parameters: 4590 + A,B - the matrices in mpiaij format 4591 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4592 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4593 4594 Output Parameter: 4595 + rowb, colb - index sets of rows and columns of B to extract 4596 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4597 - B_seq - the sequential matrix generated 4598 4599 Level: developer 4600 4601 @*/ 4602 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4603 { 4604 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4605 PetscErrorCode ierr; 4606 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4607 IS isrowb,iscolb; 4608 Mat *bseq; 4609 4610 PetscFunctionBegin; 4611 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4612 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4613 } 4614 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4615 4616 if (scall == MAT_INITIAL_MATRIX){ 4617 start = A->cmap->rstart; 4618 cmap = a->garray; 4619 nzA = a->A->cmap->n; 4620 nzB = a->B->cmap->n; 4621 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4622 ncols = 0; 4623 for (i=0; i<nzB; i++) { /* row < local row index */ 4624 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4625 else break; 4626 } 4627 imark = i; 4628 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4629 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4630 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4631 ierr = PetscFree(idx);CHKERRQ(ierr); 4632 *brstart = imark; 4633 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4634 } else { 4635 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4636 isrowb = *rowb; iscolb = *colb; 4637 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4638 bseq[0] = *B_seq; 4639 } 4640 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4641 *B_seq = bseq[0]; 4642 ierr = PetscFree(bseq);CHKERRQ(ierr); 4643 if (!rowb){ 4644 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4645 } else { 4646 *rowb = isrowb; 4647 } 4648 if (!colb){ 4649 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4650 } else { 4651 *colb = iscolb; 4652 } 4653 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4654 PetscFunctionReturn(0); 4655 } 4656 4657 #undef __FUNCT__ 4658 #define __FUNCT__ "MatGetBrowsOfAoCols" 4659 /*@C 4660 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4661 of the OFF-DIAGONAL portion of local A 4662 4663 Collective on Mat 4664 4665 Input Parameters: 4666 + A,B - the matrices in mpiaij format 4667 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4668 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4669 . startsj_r - similar to startsj for receives 4670 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4671 4672 Output Parameter: 4673 + B_oth - the sequential matrix generated 4674 4675 Level: developer 4676 4677 @*/ 4678 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4679 { 4680 VecScatter_MPI_General *gen_to,*gen_from; 4681 PetscErrorCode ierr; 4682 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4683 Mat_SeqAIJ *b_oth; 4684 VecScatter ctx=a->Mvctx; 4685 MPI_Comm comm=((PetscObject)ctx)->comm; 4686 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4687 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4688 PetscScalar *rvalues,*svalues; 4689 MatScalar *b_otha,*bufa,*bufA; 4690 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4691 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4692 MPI_Status *sstatus,rstatus; 4693 PetscMPIInt jj; 4694 PetscInt *cols,sbs,rbs; 4695 PetscScalar *vals; 4696 4697 PetscFunctionBegin; 4698 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4699 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4700 } 4701 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4702 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4703 4704 gen_to = (VecScatter_MPI_General*)ctx->todata; 4705 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4706 rvalues = gen_from->values; /* holds the length of receiving row */ 4707 svalues = gen_to->values; /* holds the length of sending row */ 4708 nrecvs = gen_from->n; 4709 nsends = gen_to->n; 4710 4711 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4712 srow = gen_to->indices; /* local row index to be sent */ 4713 sstarts = gen_to->starts; 4714 sprocs = gen_to->procs; 4715 sstatus = gen_to->sstatus; 4716 sbs = gen_to->bs; 4717 rstarts = gen_from->starts; 4718 rprocs = gen_from->procs; 4719 rbs = gen_from->bs; 4720 4721 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4722 if (scall == MAT_INITIAL_MATRIX){ 4723 /* i-array */ 4724 /*---------*/ 4725 /* post receives */ 4726 for (i=0; i<nrecvs; i++){ 4727 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4728 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4729 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4730 } 4731 4732 /* pack the outgoing message */ 4733 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4734 sstartsj[0] = 0; rstartsj[0] = 0; 4735 len = 0; /* total length of j or a array to be sent */ 4736 k = 0; 4737 for (i=0; i<nsends; i++){ 4738 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4739 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4740 for (j=0; j<nrows; j++) { 4741 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4742 for (l=0; l<sbs; l++){ 4743 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4744 rowlen[j*sbs+l] = ncols; 4745 len += ncols; 4746 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4747 } 4748 k++; 4749 } 4750 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4751 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4752 } 4753 /* recvs and sends of i-array are completed */ 4754 i = nrecvs; 4755 while (i--) { 4756 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4757 } 4758 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4759 4760 /* allocate buffers for sending j and a arrays */ 4761 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4762 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4763 4764 /* create i-array of B_oth */ 4765 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4766 b_othi[0] = 0; 4767 len = 0; /* total length of j or a array to be received */ 4768 k = 0; 4769 for (i=0; i<nrecvs; i++){ 4770 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4771 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4772 for (j=0; j<nrows; j++) { 4773 b_othi[k+1] = b_othi[k] + rowlen[j]; 4774 len += rowlen[j]; k++; 4775 } 4776 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4777 } 4778 4779 /* allocate space for j and a arrrays of B_oth */ 4780 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4781 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4782 4783 /* j-array */ 4784 /*---------*/ 4785 /* post receives of j-array */ 4786 for (i=0; i<nrecvs; i++){ 4787 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4788 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4789 } 4790 4791 /* pack the outgoing message j-array */ 4792 k = 0; 4793 for (i=0; i<nsends; i++){ 4794 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4795 bufJ = bufj+sstartsj[i]; 4796 for (j=0; j<nrows; j++) { 4797 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4798 for (ll=0; ll<sbs; ll++){ 4799 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4800 for (l=0; l<ncols; l++){ 4801 *bufJ++ = cols[l]; 4802 } 4803 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4804 } 4805 } 4806 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4807 } 4808 4809 /* recvs and sends of j-array are completed */ 4810 i = nrecvs; 4811 while (i--) { 4812 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4813 } 4814 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4815 } else if (scall == MAT_REUSE_MATRIX){ 4816 sstartsj = *startsj; 4817 rstartsj = *startsj_r; 4818 bufa = *bufa_ptr; 4819 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4820 b_otha = b_oth->a; 4821 } else { 4822 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4823 } 4824 4825 /* a-array */ 4826 /*---------*/ 4827 /* post receives of a-array */ 4828 for (i=0; i<nrecvs; i++){ 4829 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4830 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4831 } 4832 4833 /* pack the outgoing message a-array */ 4834 k = 0; 4835 for (i=0; i<nsends; i++){ 4836 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4837 bufA = bufa+sstartsj[i]; 4838 for (j=0; j<nrows; j++) { 4839 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4840 for (ll=0; ll<sbs; ll++){ 4841 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4842 for (l=0; l<ncols; l++){ 4843 *bufA++ = vals[l]; 4844 } 4845 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4846 } 4847 } 4848 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4849 } 4850 /* recvs and sends of a-array are completed */ 4851 i = nrecvs; 4852 while (i--) { 4853 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4854 } 4855 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4856 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4857 4858 if (scall == MAT_INITIAL_MATRIX){ 4859 /* put together the new matrix */ 4860 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4861 4862 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4863 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4864 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4865 b_oth->free_a = PETSC_TRUE; 4866 b_oth->free_ij = PETSC_TRUE; 4867 b_oth->nonew = 0; 4868 4869 ierr = PetscFree(bufj);CHKERRQ(ierr); 4870 if (!startsj || !bufa_ptr){ 4871 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4872 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4873 } else { 4874 *startsj = sstartsj; 4875 *startsj_r = rstartsj; 4876 *bufa_ptr = bufa; 4877 } 4878 } 4879 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4880 PetscFunctionReturn(0); 4881 } 4882 4883 #undef __FUNCT__ 4884 #define __FUNCT__ "MatGetCommunicationStructs" 4885 /*@C 4886 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4887 4888 Not Collective 4889 4890 Input Parameters: 4891 . A - The matrix in mpiaij format 4892 4893 Output Parameter: 4894 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4895 . colmap - A map from global column index to local index into lvec 4896 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4897 4898 Level: developer 4899 4900 @*/ 4901 #if defined (PETSC_USE_CTABLE) 4902 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4903 #else 4904 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4905 #endif 4906 { 4907 Mat_MPIAIJ *a; 4908 4909 PetscFunctionBegin; 4910 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4911 PetscValidPointer(lvec, 2) 4912 PetscValidPointer(colmap, 3) 4913 PetscValidPointer(multScatter, 4) 4914 a = (Mat_MPIAIJ *) A->data; 4915 if (lvec) *lvec = a->lvec; 4916 if (colmap) *colmap = a->colmap; 4917 if (multScatter) *multScatter = a->Mvctx; 4918 PetscFunctionReturn(0); 4919 } 4920 4921 EXTERN_C_BEGIN 4922 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4923 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4924 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4925 EXTERN_C_END 4926 4927 #undef __FUNCT__ 4928 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4929 /* 4930 Computes (B'*A')' since computing B*A directly is untenable 4931 4932 n p p 4933 ( ) ( ) ( ) 4934 m ( A ) * n ( B ) = m ( C ) 4935 ( ) ( ) ( ) 4936 4937 */ 4938 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4939 { 4940 PetscErrorCode ierr; 4941 Mat At,Bt,Ct; 4942 4943 PetscFunctionBegin; 4944 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4945 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4946 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4947 ierr = MatDestroy(At);CHKERRQ(ierr); 4948 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4949 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4950 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4951 PetscFunctionReturn(0); 4952 } 4953 4954 #undef __FUNCT__ 4955 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4956 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4957 { 4958 PetscErrorCode ierr; 4959 PetscInt m=A->rmap->n,n=B->cmap->n; 4960 Mat Cmat; 4961 4962 PetscFunctionBegin; 4963 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4964 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4965 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4966 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4967 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4968 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4969 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4970 *C = Cmat; 4971 PetscFunctionReturn(0); 4972 } 4973 4974 /* ----------------------------------------------------------------*/ 4975 #undef __FUNCT__ 4976 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4977 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4978 { 4979 PetscErrorCode ierr; 4980 4981 PetscFunctionBegin; 4982 if (scall == MAT_INITIAL_MATRIX){ 4983 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4984 } 4985 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4986 PetscFunctionReturn(0); 4987 } 4988 4989 EXTERN_C_BEGIN 4990 #if defined(PETSC_HAVE_MUMPS) 4991 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4992 #endif 4993 #if defined(PETSC_HAVE_PASTIX) 4994 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4995 #endif 4996 #if defined(PETSC_HAVE_SUPERLU_DIST) 4997 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4998 #endif 4999 #if defined(PETSC_HAVE_SPOOLES) 5000 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5001 #endif 5002 EXTERN_C_END 5003 5004 /*MC 5005 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5006 5007 Options Database Keys: 5008 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5009 5010 Level: beginner 5011 5012 .seealso: MatCreateMPIAIJ() 5013 M*/ 5014 5015 EXTERN_C_BEGIN 5016 #undef __FUNCT__ 5017 #define __FUNCT__ "MatCreate_MPIAIJ" 5018 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5019 { 5020 Mat_MPIAIJ *b; 5021 PetscErrorCode ierr; 5022 PetscMPIInt size; 5023 5024 PetscFunctionBegin; 5025 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5026 5027 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5028 B->data = (void*)b; 5029 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5030 B->rmap->bs = 1; 5031 B->assembled = PETSC_FALSE; 5032 B->mapping = 0; 5033 5034 B->insertmode = NOT_SET_VALUES; 5035 b->size = size; 5036 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5037 5038 /* build cache for off array entries formed */ 5039 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5040 b->donotstash = PETSC_FALSE; 5041 b->colmap = 0; 5042 b->garray = 0; 5043 b->roworiented = PETSC_TRUE; 5044 5045 /* stuff used for matrix vector multiply */ 5046 b->lvec = PETSC_NULL; 5047 b->Mvctx = PETSC_NULL; 5048 5049 /* stuff for MatGetRow() */ 5050 b->rowindices = 0; 5051 b->rowvalues = 0; 5052 b->getrowactive = PETSC_FALSE; 5053 5054 #if defined(PETSC_HAVE_SPOOLES) 5055 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5056 "MatGetFactor_mpiaij_spooles", 5057 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5058 #endif 5059 #if defined(PETSC_HAVE_MUMPS) 5060 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5061 "MatGetFactor_mpiaij_mumps", 5062 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5063 #endif 5064 #if defined(PETSC_HAVE_PASTIX) 5065 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5066 "MatGetFactor_mpiaij_pastix", 5067 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5068 #endif 5069 #if defined(PETSC_HAVE_SUPERLU_DIST) 5070 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5071 "MatGetFactor_mpiaij_superlu_dist", 5072 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5073 #endif 5074 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5075 "MatStoreValues_MPIAIJ", 5076 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5077 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5078 "MatRetrieveValues_MPIAIJ", 5079 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5080 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5081 "MatGetDiagonalBlock_MPIAIJ", 5082 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5083 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5084 "MatIsTranspose_MPIAIJ", 5085 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5086 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5087 "MatMPIAIJSetPreallocation_MPIAIJ", 5088 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5090 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5091 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5092 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5093 "MatDiagonalScaleLocal_MPIAIJ", 5094 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5096 "MatConvert_MPIAIJ_MPICSRPERM", 5097 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5098 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5099 "MatConvert_MPIAIJ_MPICRL", 5100 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5102 "MatConvert_MPIAIJ_MPISBAIJ", 5103 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5104 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5105 "MatMatMult_MPIDense_MPIAIJ", 5106 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5108 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5109 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5110 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5111 "MatMatMultNumeric_MPIDense_MPIAIJ", 5112 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5113 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5114 PetscFunctionReturn(0); 5115 } 5116 EXTERN_C_END 5117 5118 #undef __FUNCT__ 5119 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5120 /*@ 5121 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5122 and "off-diagonal" part of the matrix in CSR format. 5123 5124 Collective on MPI_Comm 5125 5126 Input Parameters: 5127 + comm - MPI communicator 5128 . m - number of local rows (Cannot be PETSC_DECIDE) 5129 . n - This value should be the same as the local size used in creating the 5130 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5131 calculated if N is given) For square matrices n is almost always m. 5132 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5133 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5134 . i - row indices for "diagonal" portion of matrix 5135 . j - column indices 5136 . a - matrix values 5137 . oi - row indices for "off-diagonal" portion of matrix 5138 . oj - column indices 5139 - oa - matrix values 5140 5141 Output Parameter: 5142 . mat - the matrix 5143 5144 Level: advanced 5145 5146 Notes: 5147 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5148 5149 The i and j indices are 0 based 5150 5151 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5152 5153 This sets local rows and cannot be used to set off-processor values. 5154 5155 You cannot later use MatSetValues() to change values in this matrix. 5156 5157 .keywords: matrix, aij, compressed row, sparse, parallel 5158 5159 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5160 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5161 @*/ 5162 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5163 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5164 { 5165 PetscErrorCode ierr; 5166 Mat_MPIAIJ *maij; 5167 5168 PetscFunctionBegin; 5169 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5170 if (i[0]) { 5171 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5172 } 5173 if (oi[0]) { 5174 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5175 } 5176 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5177 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5178 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5179 maij = (Mat_MPIAIJ*) (*mat)->data; 5180 maij->donotstash = PETSC_TRUE; 5181 (*mat)->preallocated = PETSC_TRUE; 5182 5183 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5184 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5185 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5186 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5187 5188 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5189 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5190 5191 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5192 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5193 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5194 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5195 5196 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5197 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5198 PetscFunctionReturn(0); 5199 } 5200 5201 /* 5202 Special version for direct calls from Fortran 5203 */ 5204 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5205 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5206 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5207 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5208 #endif 5209 5210 /* Change these macros so can be used in void function */ 5211 #undef CHKERRQ 5212 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5213 #undef SETERRQ2 5214 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5215 #undef SETERRQ 5216 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5217 5218 EXTERN_C_BEGIN 5219 #undef __FUNCT__ 5220 #define __FUNCT__ "matsetvaluesmpiaij_" 5221 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5222 { 5223 Mat mat = *mmat; 5224 PetscInt m = *mm, n = *mn; 5225 InsertMode addv = *maddv; 5226 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5227 PetscScalar value; 5228 PetscErrorCode ierr; 5229 5230 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5231 if (mat->insertmode == NOT_SET_VALUES) { 5232 mat->insertmode = addv; 5233 } 5234 #if defined(PETSC_USE_DEBUG) 5235 else if (mat->insertmode != addv) { 5236 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5237 } 5238 #endif 5239 { 5240 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5241 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5242 PetscTruth roworiented = aij->roworiented; 5243 5244 /* Some Variables required in the macro */ 5245 Mat A = aij->A; 5246 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5247 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5248 MatScalar *aa = a->a; 5249 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5250 Mat B = aij->B; 5251 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5252 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5253 MatScalar *ba = b->a; 5254 5255 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5256 PetscInt nonew = a->nonew; 5257 MatScalar *ap1,*ap2; 5258 5259 PetscFunctionBegin; 5260 for (i=0; i<m; i++) { 5261 if (im[i] < 0) continue; 5262 #if defined(PETSC_USE_DEBUG) 5263 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5264 #endif 5265 if (im[i] >= rstart && im[i] < rend) { 5266 row = im[i] - rstart; 5267 lastcol1 = -1; 5268 rp1 = aj + ai[row]; 5269 ap1 = aa + ai[row]; 5270 rmax1 = aimax[row]; 5271 nrow1 = ailen[row]; 5272 low1 = 0; 5273 high1 = nrow1; 5274 lastcol2 = -1; 5275 rp2 = bj + bi[row]; 5276 ap2 = ba + bi[row]; 5277 rmax2 = bimax[row]; 5278 nrow2 = bilen[row]; 5279 low2 = 0; 5280 high2 = nrow2; 5281 5282 for (j=0; j<n; j++) { 5283 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5284 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5285 if (in[j] >= cstart && in[j] < cend){ 5286 col = in[j] - cstart; 5287 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5288 } else if (in[j] < 0) continue; 5289 #if defined(PETSC_USE_DEBUG) 5290 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5291 #endif 5292 else { 5293 if (mat->was_assembled) { 5294 if (!aij->colmap) { 5295 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5296 } 5297 #if defined (PETSC_USE_CTABLE) 5298 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5299 col--; 5300 #else 5301 col = aij->colmap[in[j]] - 1; 5302 #endif 5303 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5304 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5305 col = in[j]; 5306 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5307 B = aij->B; 5308 b = (Mat_SeqAIJ*)B->data; 5309 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5310 rp2 = bj + bi[row]; 5311 ap2 = ba + bi[row]; 5312 rmax2 = bimax[row]; 5313 nrow2 = bilen[row]; 5314 low2 = 0; 5315 high2 = nrow2; 5316 bm = aij->B->rmap->n; 5317 ba = b->a; 5318 } 5319 } else col = in[j]; 5320 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5321 } 5322 } 5323 } else { 5324 if (!aij->donotstash) { 5325 if (roworiented) { 5326 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5327 } else { 5328 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5329 } 5330 } 5331 } 5332 }} 5333 PetscFunctionReturnVoid(); 5334 } 5335 EXTERN_C_END 5336 5337