1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 if (v) PetscValidScalarPointer(v,6); 331 for (i=0; i<m; i++) { 332 if (im[i] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 335 #endif 336 if (im[i] >= rstart && im[i] < rend) { 337 row = im[i] - rstart; 338 lastcol1 = -1; 339 rp1 = aj + ai[row]; 340 ap1 = aa + ai[row]; 341 rmax1 = aimax[row]; 342 nrow1 = ailen[row]; 343 low1 = 0; 344 high1 = nrow1; 345 lastcol2 = -1; 346 rp2 = bj + bi[row]; 347 ap2 = ba + bi[row]; 348 rmax2 = bimax[row]; 349 nrow2 = bilen[row]; 350 low2 = 0; 351 high2 = nrow2; 352 353 for (j=0; j<n; j++) { 354 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 355 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 356 if (in[j] >= cstart && in[j] < cend){ 357 col = in[j] - cstart; 358 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 359 } else if (in[j] < 0) continue; 360 #if defined(PETSC_USE_DEBUG) 361 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 362 #endif 363 else { 364 if (mat->was_assembled) { 365 if (!aij->colmap) { 366 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 367 } 368 #if defined (PETSC_USE_CTABLE) 369 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 370 col--; 371 #else 372 col = aij->colmap[in[j]] - 1; 373 #endif 374 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 375 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 376 col = in[j]; 377 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 378 B = aij->B; 379 b = (Mat_SeqAIJ*)B->data; 380 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 381 rp2 = bj + bi[row]; 382 ap2 = ba + bi[row]; 383 rmax2 = bimax[row]; 384 nrow2 = bilen[row]; 385 low2 = 0; 386 high2 = nrow2; 387 bm = aij->B->rmap->n; 388 ba = b->a; 389 } 390 } else col = in[j]; 391 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 392 } 393 } 394 } else { 395 if (!aij->donotstash) { 396 if (roworiented) { 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } else { 399 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 400 } 401 } 402 } 403 } 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatGetValues_MPIAIJ" 409 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 414 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 415 416 PetscFunctionBegin; 417 for (i=0; i<m; i++) { 418 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 419 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 420 if (idxm[i] >= rstart && idxm[i] < rend) { 421 row = idxm[i] - rstart; 422 for (j=0; j<n; j++) { 423 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 424 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 425 if (idxn[j] >= cstart && idxn[j] < cend){ 426 col = idxn[j] - cstart; 427 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 428 } else { 429 if (!aij->colmap) { 430 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 431 } 432 #if defined (PETSC_USE_CTABLE) 433 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 434 col --; 435 #else 436 col = aij->colmap[idxn[j]] - 1; 437 #endif 438 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 439 else { 440 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 441 } 442 } 443 } 444 } else { 445 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 446 } 447 } 448 PetscFunctionReturn(0); 449 } 450 451 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 455 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 456 { 457 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 458 PetscErrorCode ierr; 459 PetscInt nstash,reallocs; 460 InsertMode addv; 461 462 PetscFunctionBegin; 463 if (aij->donotstash) { 464 PetscFunctionReturn(0); 465 } 466 467 /* make sure all processors are either in INSERTMODE or ADDMODE */ 468 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 469 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 mat->insertmode = addv; /* in case this processor had no cache */ 471 472 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 473 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 474 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 484 PetscErrorCode ierr; 485 PetscMPIInt n; 486 PetscInt i,j,rstart,ncols,flg; 487 PetscInt *row,*col; 488 PetscTruth other_disassembled; 489 PetscScalar *val; 490 InsertMode addv = mat->insertmode; 491 492 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 493 PetscFunctionBegin; 494 if (!aij->donotstash) { 495 while (1) { 496 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 497 if (!flg) break; 498 499 for (i=0; i<n;) { 500 /* Now identify the consecutive vals belonging to the same row */ 501 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 502 if (j < n) ncols = j-i; 503 else ncols = n-i; 504 /* Now assemble all these values with a single function call */ 505 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 506 i = j; 507 } 508 } 509 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 510 } 511 a->compressedrow.use = PETSC_FALSE; 512 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 513 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 514 515 /* determine if any processor has disassembled, if so we must 516 also disassemble ourselfs, in order that we may reassemble. */ 517 /* 518 if nonzero structure of submatrix B cannot change then we know that 519 no processor disassembled thus we can skip this stuff 520 */ 521 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 522 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 523 if (mat->was_assembled && !other_disassembled) { 524 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 } 526 } 527 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 528 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 529 } 530 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 531 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 532 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 534 535 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 536 aij->rowvalues = 0; 537 538 /* used by MatAXPY() */ 539 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 540 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 541 542 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 543 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 550 { 551 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 556 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatZeroRows_MPIAIJ" 562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 563 { 564 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 567 PetscInt i,*owners = A->rmap->range; 568 PetscInt *nprocs,j,idx,nsends,row; 569 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 570 PetscInt *rvalues,count,base,slen,*source; 571 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 572 MPI_Comm comm = ((PetscObject)A)->comm; 573 MPI_Request *send_waits,*recv_waits; 574 MPI_Status recv_status,*send_status; 575 #if defined(PETSC_DEBUG) 576 PetscTruth found = PETSC_FALSE; 577 #endif 578 579 PetscFunctionBegin; 580 /* first count number of contributors to each processor */ 581 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 582 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 583 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 584 j = 0; 585 for (i=0; i<N; i++) { 586 if (lastidx > (idx = rows[i])) j = 0; 587 lastidx = idx; 588 for (; j<size; j++) { 589 if (idx >= owners[j] && idx < owners[j+1]) { 590 nprocs[2*j]++; 591 nprocs[2*j+1] = 1; 592 owner[i] = j; 593 #if defined(PETSC_DEBUG) 594 found = PETSC_TRUE; 595 #endif 596 break; 597 } 598 } 599 #if defined(PETSC_DEBUG) 600 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 601 found = PETSC_FALSE; 602 #endif 603 } 604 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 605 606 /* inform other processors of number of messages and max length*/ 607 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 608 609 /* post receives: */ 610 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 611 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 612 for (i=0; i<nrecvs; i++) { 613 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 614 } 615 616 /* do sends: 617 1) starts[i] gives the starting index in svalues for stuff going to 618 the ith processor 619 */ 620 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 621 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 622 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 623 starts[0] = 0; 624 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 625 for (i=0; i<N; i++) { 626 svalues[starts[owner[i]]++] = rows[i]; 627 } 628 629 starts[0] = 0; 630 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 631 count = 0; 632 for (i=0; i<size; i++) { 633 if (nprocs[2*i+1]) { 634 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 635 } 636 } 637 ierr = PetscFree(starts);CHKERRQ(ierr); 638 639 base = owners[rank]; 640 641 /* wait on receives */ 642 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 */ 677 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 678 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 679 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 680 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 681 } else if (diag != 0.0) { 682 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 683 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 684 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 685 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 686 } 687 for (i = 0; i < slen; i++) { 688 row = lrows[i] + rstart; 689 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 690 } 691 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 692 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 } else { 694 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 695 } 696 ierr = PetscFree(lrows);CHKERRQ(ierr); 697 698 /* wait on sends */ 699 if (nsends) { 700 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 701 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 702 ierr = PetscFree(send_status);CHKERRQ(ierr); 703 } 704 ierr = PetscFree(send_waits);CHKERRQ(ierr); 705 ierr = PetscFree(svalues);CHKERRQ(ierr); 706 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatMult_MPIAIJ" 712 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 715 PetscErrorCode ierr; 716 PetscInt nt; 717 718 PetscFunctionBegin; 719 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 720 if (nt != A->cmap->n) { 721 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 722 } 723 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 724 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 725 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 732 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 733 { 734 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatMultAdd_MPIAIJ" 744 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 745 { 746 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 747 PetscErrorCode ierr; 748 749 PetscFunctionBegin; 750 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 751 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 752 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 753 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 759 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 760 { 761 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 762 PetscErrorCode ierr; 763 PetscTruth merged; 764 765 PetscFunctionBegin; 766 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 767 /* do nondiagonal part */ 768 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 769 if (!merged) { 770 /* send it on its way */ 771 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 772 /* do local part */ 773 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 774 /* receive remote parts: note this assumes the values are not actually */ 775 /* added in yy until the next line, */ 776 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 777 } else { 778 /* do local part */ 779 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 780 /* send it on its way */ 781 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 782 /* values actually were received in the Begin() but we need to call this nop */ 783 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 784 } 785 PetscFunctionReturn(0); 786 } 787 788 EXTERN_C_BEGIN 789 #undef __FUNCT__ 790 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 791 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 792 { 793 MPI_Comm comm; 794 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 795 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 796 IS Me,Notme; 797 PetscErrorCode ierr; 798 PetscInt M,N,first,last,*notme,i; 799 PetscMPIInt size; 800 801 PetscFunctionBegin; 802 803 /* Easy test: symmetric diagonal block */ 804 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 805 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 806 if (!*f) PetscFunctionReturn(0); 807 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 808 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 809 if (size == 1) PetscFunctionReturn(0); 810 811 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 812 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 813 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 814 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 815 for (i=0; i<first; i++) notme[i] = i; 816 for (i=last; i<M; i++) notme[i-last+first] = i; 817 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 818 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 819 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 820 Aoff = Aoffs[0]; 821 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 822 Boff = Boffs[0]; 823 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 824 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 825 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 826 ierr = ISDestroy(Me);CHKERRQ(ierr); 827 ierr = ISDestroy(Notme);CHKERRQ(ierr); 828 829 PetscFunctionReturn(0); 830 } 831 EXTERN_C_END 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 835 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 836 { 837 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 838 PetscErrorCode ierr; 839 840 PetscFunctionBegin; 841 /* do nondiagonal part */ 842 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 843 /* send it on its way */ 844 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 /* do local part */ 846 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 847 /* receive remote parts */ 848 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 849 PetscFunctionReturn(0); 850 } 851 852 /* 853 This only works correctly for square matrices where the subblock A->A is the 854 diagonal block 855 */ 856 #undef __FUNCT__ 857 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 858 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 859 { 860 PetscErrorCode ierr; 861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 862 863 PetscFunctionBegin; 864 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 865 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 866 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatScale_MPIAIJ" 872 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 873 { 874 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 875 PetscErrorCode ierr; 876 877 PetscFunctionBegin; 878 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 879 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 #undef __FUNCT__ 884 #define __FUNCT__ "MatDestroy_MPIAIJ" 885 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 886 { 887 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 888 PetscErrorCode ierr; 889 890 PetscFunctionBegin; 891 #if defined(PETSC_USE_LOG) 892 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 893 #endif 894 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 895 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 896 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 897 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 898 #if defined (PETSC_USE_CTABLE) 899 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 900 #else 901 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 902 #endif 903 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 904 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 905 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 906 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 907 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 908 ierr = PetscFree(aij);CHKERRQ(ierr); 909 910 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatView_MPIAIJ_Binary" 924 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 925 { 926 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 927 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 928 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 929 PetscErrorCode ierr; 930 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 931 int fd; 932 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 933 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 934 PetscScalar *column_values; 935 936 PetscFunctionBegin; 937 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 938 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 939 nz = A->nz + B->nz; 940 if (!rank) { 941 header[0] = MAT_FILE_CLASSID; 942 header[1] = mat->rmap->N; 943 header[2] = mat->cmap->N; 944 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 945 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 946 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 947 /* get largest number of rows any processor has */ 948 rlen = mat->rmap->n; 949 range = mat->rmap->range; 950 for (i=1; i<size; i++) { 951 rlen = PetscMax(rlen,range[i+1] - range[i]); 952 } 953 } else { 954 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 955 rlen = mat->rmap->n; 956 } 957 958 /* load up the local row counts */ 959 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 960 for (i=0; i<mat->rmap->n; i++) { 961 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 962 } 963 964 /* store the row lengths to the file */ 965 if (!rank) { 966 MPI_Status status; 967 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 for (i=1; i<size; i++) { 969 rlen = range[i+1] - range[i]; 970 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 971 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 972 } 973 } else { 974 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 975 } 976 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 977 978 /* load up the local column indices */ 979 nzmax = nz; /* )th processor needs space a largest processor needs */ 980 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 981 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 982 cnt = 0; 983 for (i=0; i<mat->rmap->n; i++) { 984 for (j=B->i[i]; j<B->i[i+1]; j++) { 985 if ( (col = garray[B->j[j]]) > cstart) break; 986 column_indices[cnt++] = col; 987 } 988 for (k=A->i[i]; k<A->i[i+1]; k++) { 989 column_indices[cnt++] = A->j[k] + cstart; 990 } 991 for (; j<B->i[i+1]; j++) { 992 column_indices[cnt++] = garray[B->j[j]]; 993 } 994 } 995 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 996 997 /* store the column indices to the file */ 998 if (!rank) { 999 MPI_Status status; 1000 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1001 for (i=1; i<size; i++) { 1002 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1003 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1004 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1005 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1006 } 1007 } else { 1008 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1009 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1010 } 1011 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1012 1013 /* load up the local column values */ 1014 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1015 cnt = 0; 1016 for (i=0; i<mat->rmap->n; i++) { 1017 for (j=B->i[i]; j<B->i[i+1]; j++) { 1018 if ( garray[B->j[j]] > cstart) break; 1019 column_values[cnt++] = B->a[j]; 1020 } 1021 for (k=A->i[i]; k<A->i[i+1]; k++) { 1022 column_values[cnt++] = A->a[k]; 1023 } 1024 for (; j<B->i[i+1]; j++) { 1025 column_values[cnt++] = B->a[j]; 1026 } 1027 } 1028 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1029 1030 /* store the column values to the file */ 1031 if (!rank) { 1032 MPI_Status status; 1033 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1034 for (i=1; i<size; i++) { 1035 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1036 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1037 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1038 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1039 } 1040 } else { 1041 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1042 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1043 } 1044 ierr = PetscFree(column_values);CHKERRQ(ierr); 1045 PetscFunctionReturn(0); 1046 } 1047 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1050 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1051 { 1052 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1053 PetscErrorCode ierr; 1054 PetscMPIInt rank = aij->rank,size = aij->size; 1055 PetscTruth isdraw,iascii,isbinary; 1056 PetscViewer sviewer; 1057 PetscViewerFormat format; 1058 1059 PetscFunctionBegin; 1060 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1061 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1062 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1063 if (iascii) { 1064 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1065 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1066 MatInfo info; 1067 PetscTruth inodes; 1068 1069 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1070 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1072 if (!inodes) { 1073 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1074 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1075 } else { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } 1079 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1081 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1082 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1083 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1084 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1085 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1088 PetscInt inodecount,inodelimit,*inodes; 1089 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1090 if (inodes) { 1091 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1092 } else { 1093 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1097 PetscFunctionReturn(0); 1098 } 1099 } else if (isbinary) { 1100 if (size == 1) { 1101 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1102 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1103 } else { 1104 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1105 } 1106 PetscFunctionReturn(0); 1107 } else if (isdraw) { 1108 PetscDraw draw; 1109 PetscTruth isnull; 1110 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1111 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1112 } 1113 1114 if (size == 1) { 1115 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1116 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1117 } else { 1118 /* assemble the entire matrix onto first processor. */ 1119 Mat A; 1120 Mat_SeqAIJ *Aloc; 1121 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1122 MatScalar *a; 1123 1124 if (mat->rmap->N > 1024) { 1125 PetscTruth flg = PETSC_FALSE; 1126 1127 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1128 if (!flg) { 1129 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1130 } 1131 } 1132 1133 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1134 if (!rank) { 1135 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1136 } else { 1137 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1138 } 1139 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1140 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1141 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1142 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1143 1144 /* copy over the A part */ 1145 Aloc = (Mat_SeqAIJ*)aij->A->data; 1146 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1147 row = mat->rmap->rstart; 1148 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1149 for (i=0; i<m; i++) { 1150 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1151 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1152 } 1153 aj = Aloc->j; 1154 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1155 1156 /* copy over the B part */ 1157 Aloc = (Mat_SeqAIJ*)aij->B->data; 1158 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1159 row = mat->rmap->rstart; 1160 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1161 ct = cols; 1162 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1163 for (i=0; i<m; i++) { 1164 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1165 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1166 } 1167 ierr = PetscFree(ct);CHKERRQ(ierr); 1168 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1169 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1170 /* 1171 Everyone has to call to draw the matrix since the graphics waits are 1172 synchronized across all processors that share the PetscDraw object 1173 */ 1174 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1175 if (!rank) { 1176 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1177 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1178 } 1179 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1180 ierr = MatDestroy(A);CHKERRQ(ierr); 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatView_MPIAIJ" 1187 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1188 { 1189 PetscErrorCode ierr; 1190 PetscTruth iascii,isdraw,issocket,isbinary; 1191 1192 PetscFunctionBegin; 1193 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1194 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1195 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1197 if (iascii || isdraw || isbinary || issocket) { 1198 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1199 } else { 1200 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1201 } 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatSOR_MPIAIJ" 1207 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1208 { 1209 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1210 PetscErrorCode ierr; 1211 Vec bb1 = 0; 1212 PetscTruth hasop; 1213 1214 PetscFunctionBegin; 1215 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1216 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1217 } 1218 1219 if (flag == SOR_APPLY_UPPER) { 1220 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1221 PetscFunctionReturn(0); 1222 } 1223 1224 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1225 if (flag & SOR_ZERO_INITIAL_GUESS) { 1226 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1227 its--; 1228 } 1229 1230 while (its--) { 1231 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 1234 /* update rhs: bb1 = bb - B*x */ 1235 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1236 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1237 1238 /* local sweep */ 1239 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1240 } 1241 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1242 if (flag & SOR_ZERO_INITIAL_GUESS) { 1243 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1244 its--; 1245 } 1246 while (its--) { 1247 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1249 1250 /* update rhs: bb1 = bb - B*x */ 1251 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1252 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1253 1254 /* local sweep */ 1255 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1256 } 1257 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1258 if (flag & SOR_ZERO_INITIAL_GUESS) { 1259 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1260 its--; 1261 } 1262 while (its--) { 1263 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 1266 /* update rhs: bb1 = bb - B*x */ 1267 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1268 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1269 1270 /* local sweep */ 1271 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1272 } 1273 } else if (flag & SOR_EISENSTAT) { 1274 Vec xx1; 1275 1276 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1277 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1278 1279 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1280 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1281 if (!mat->diag) { 1282 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1283 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1284 } 1285 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1286 if (hasop) { 1287 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1288 } else { 1289 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1290 } 1291 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1292 1293 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1294 1295 /* local sweep */ 1296 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1297 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1298 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1299 } else { 1300 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1301 } 1302 1303 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatPermute_MPIAIJ" 1309 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1310 { 1311 MPI_Comm comm,pcomm; 1312 PetscInt first,local_size,nrows; 1313 const PetscInt *rows; 1314 PetscMPIInt size; 1315 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1320 /* make a collective version of 'rowp' */ 1321 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1322 if (pcomm==comm) { 1323 crowp = rowp; 1324 } else { 1325 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1326 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1327 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1328 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1329 } 1330 /* collect the global row permutation and invert it */ 1331 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1332 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1333 if (pcomm!=comm) { 1334 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1335 } 1336 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1337 /* get the local target indices */ 1338 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1339 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1340 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1341 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1342 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1343 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1344 /* the column permutation is so much easier; 1345 make a local version of 'colp' and invert it */ 1346 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1347 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1348 if (size==1) { 1349 lcolp = colp; 1350 } else { 1351 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1352 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1353 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1354 } 1355 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1356 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1357 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1358 if (size>1) { 1359 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1360 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1361 } 1362 /* now we just get the submatrix */ 1363 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1364 /* clean up */ 1365 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1366 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1372 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1373 { 1374 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1375 Mat A = mat->A,B = mat->B; 1376 PetscErrorCode ierr; 1377 PetscReal isend[5],irecv[5]; 1378 1379 PetscFunctionBegin; 1380 info->block_size = 1.0; 1381 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1382 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1383 isend[3] = info->memory; isend[4] = info->mallocs; 1384 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1386 isend[3] += info->memory; isend[4] += info->mallocs; 1387 if (flag == MAT_LOCAL) { 1388 info->nz_used = isend[0]; 1389 info->nz_allocated = isend[1]; 1390 info->nz_unneeded = isend[2]; 1391 info->memory = isend[3]; 1392 info->mallocs = isend[4]; 1393 } else if (flag == MAT_GLOBAL_MAX) { 1394 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1395 info->nz_used = irecv[0]; 1396 info->nz_allocated = irecv[1]; 1397 info->nz_unneeded = irecv[2]; 1398 info->memory = irecv[3]; 1399 info->mallocs = irecv[4]; 1400 } else if (flag == MAT_GLOBAL_SUM) { 1401 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1402 info->nz_used = irecv[0]; 1403 info->nz_allocated = irecv[1]; 1404 info->nz_unneeded = irecv[2]; 1405 info->memory = irecv[3]; 1406 info->mallocs = irecv[4]; 1407 } 1408 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1409 info->fill_ratio_needed = 0; 1410 info->factor_mallocs = 0; 1411 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatSetOption_MPIAIJ" 1417 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1418 { 1419 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1420 PetscErrorCode ierr; 1421 1422 PetscFunctionBegin; 1423 switch (op) { 1424 case MAT_NEW_NONZERO_LOCATIONS: 1425 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1426 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1427 case MAT_KEEP_NONZERO_PATTERN: 1428 case MAT_NEW_NONZERO_LOCATION_ERR: 1429 case MAT_USE_INODES: 1430 case MAT_IGNORE_ZERO_ENTRIES: 1431 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1432 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1433 break; 1434 case MAT_ROW_ORIENTED: 1435 a->roworiented = flg; 1436 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1437 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1438 break; 1439 case MAT_NEW_DIAGONALS: 1440 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1441 break; 1442 case MAT_IGNORE_OFF_PROC_ENTRIES: 1443 a->donotstash = PETSC_TRUE; 1444 break; 1445 case MAT_SYMMETRIC: 1446 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1447 break; 1448 case MAT_STRUCTURALLY_SYMMETRIC: 1449 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1450 break; 1451 case MAT_HERMITIAN: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_SYMMETRY_ETERNAL: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 default: 1458 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1459 } 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatGetRow_MPIAIJ" 1465 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1466 { 1467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1468 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1469 PetscErrorCode ierr; 1470 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1471 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1472 PetscInt *cmap,*idx_p; 1473 1474 PetscFunctionBegin; 1475 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1476 mat->getrowactive = PETSC_TRUE; 1477 1478 if (!mat->rowvalues && (idx || v)) { 1479 /* 1480 allocate enough space to hold information from the longest row. 1481 */ 1482 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1483 PetscInt max = 1,tmp; 1484 for (i=0; i<matin->rmap->n; i++) { 1485 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1486 if (max < tmp) { max = tmp; } 1487 } 1488 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1489 } 1490 1491 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1492 lrow = row - rstart; 1493 1494 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1495 if (!v) {pvA = 0; pvB = 0;} 1496 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1497 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1498 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1499 nztot = nzA + nzB; 1500 1501 cmap = mat->garray; 1502 if (v || idx) { 1503 if (nztot) { 1504 /* Sort by increasing column numbers, assuming A and B already sorted */ 1505 PetscInt imark = -1; 1506 if (v) { 1507 *v = v_p = mat->rowvalues; 1508 for (i=0; i<nzB; i++) { 1509 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1510 else break; 1511 } 1512 imark = i; 1513 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1514 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1515 } 1516 if (idx) { 1517 *idx = idx_p = mat->rowindices; 1518 if (imark > -1) { 1519 for (i=0; i<imark; i++) { 1520 idx_p[i] = cmap[cworkB[i]]; 1521 } 1522 } else { 1523 for (i=0; i<nzB; i++) { 1524 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1525 else break; 1526 } 1527 imark = i; 1528 } 1529 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1530 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1531 } 1532 } else { 1533 if (idx) *idx = 0; 1534 if (v) *v = 0; 1535 } 1536 } 1537 *nz = nztot; 1538 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1539 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1545 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1546 { 1547 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1548 1549 PetscFunctionBegin; 1550 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1551 aij->getrowactive = PETSC_FALSE; 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatNorm_MPIAIJ" 1557 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1558 { 1559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1560 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1561 PetscErrorCode ierr; 1562 PetscInt i,j,cstart = mat->cmap->rstart; 1563 PetscReal sum = 0.0; 1564 MatScalar *v; 1565 1566 PetscFunctionBegin; 1567 if (aij->size == 1) { 1568 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1569 } else { 1570 if (type == NORM_FROBENIUS) { 1571 v = amat->a; 1572 for (i=0; i<amat->nz; i++) { 1573 #if defined(PETSC_USE_COMPLEX) 1574 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1575 #else 1576 sum += (*v)*(*v); v++; 1577 #endif 1578 } 1579 v = bmat->a; 1580 for (i=0; i<bmat->nz; i++) { 1581 #if defined(PETSC_USE_COMPLEX) 1582 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1583 #else 1584 sum += (*v)*(*v); v++; 1585 #endif 1586 } 1587 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1588 *norm = sqrt(*norm); 1589 } else if (type == NORM_1) { /* max column norm */ 1590 PetscReal *tmp,*tmp2; 1591 PetscInt *jj,*garray = aij->garray; 1592 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1593 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1594 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1595 *norm = 0.0; 1596 v = amat->a; jj = amat->j; 1597 for (j=0; j<amat->nz; j++) { 1598 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1599 } 1600 v = bmat->a; jj = bmat->j; 1601 for (j=0; j<bmat->nz; j++) { 1602 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1603 } 1604 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1605 for (j=0; j<mat->cmap->N; j++) { 1606 if (tmp2[j] > *norm) *norm = tmp2[j]; 1607 } 1608 ierr = PetscFree(tmp);CHKERRQ(ierr); 1609 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1610 } else if (type == NORM_INFINITY) { /* max row norm */ 1611 PetscReal ntemp = 0.0; 1612 for (j=0; j<aij->A->rmap->n; j++) { 1613 v = amat->a + amat->i[j]; 1614 sum = 0.0; 1615 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1616 sum += PetscAbsScalar(*v); v++; 1617 } 1618 v = bmat->a + bmat->i[j]; 1619 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1620 sum += PetscAbsScalar(*v); v++; 1621 } 1622 if (sum > ntemp) ntemp = sum; 1623 } 1624 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1625 } else { 1626 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1627 } 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatTranspose_MPIAIJ" 1634 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1635 { 1636 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1637 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1638 PetscErrorCode ierr; 1639 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1640 PetscInt cstart=A->cmap->rstart,ncol; 1641 Mat B; 1642 MatScalar *array; 1643 1644 PetscFunctionBegin; 1645 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1646 1647 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1648 ai = Aloc->i; aj = Aloc->j; 1649 bi = Bloc->i; bj = Bloc->j; 1650 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1651 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1652 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1653 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1654 for (i=0; i<ai[ma]; i++){ 1655 d_nnz[aj[i]] ++; 1656 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1657 } 1658 1659 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1660 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1661 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1662 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1663 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1664 } else { 1665 B = *matout; 1666 } 1667 1668 /* copy over the A part */ 1669 array = Aloc->a; 1670 row = A->rmap->rstart; 1671 for (i=0; i<ma; i++) { 1672 ncol = ai[i+1]-ai[i]; 1673 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1674 row++; array += ncol; aj += ncol; 1675 } 1676 aj = Aloc->j; 1677 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1678 1679 /* copy over the B part */ 1680 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1681 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1682 array = Bloc->a; 1683 row = A->rmap->rstart; 1684 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1685 cols_tmp = cols; 1686 for (i=0; i<mb; i++) { 1687 ncol = bi[i+1]-bi[i]; 1688 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1689 row++; array += ncol; cols_tmp += ncol; 1690 } 1691 ierr = PetscFree(cols);CHKERRQ(ierr); 1692 1693 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1694 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1695 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1696 *matout = B; 1697 } else { 1698 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1699 } 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1705 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1706 { 1707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1708 Mat a = aij->A,b = aij->B; 1709 PetscErrorCode ierr; 1710 PetscInt s1,s2,s3; 1711 1712 PetscFunctionBegin; 1713 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1714 if (rr) { 1715 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1716 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1717 /* Overlap communication with computation. */ 1718 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1719 } 1720 if (ll) { 1721 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1722 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1723 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1724 } 1725 /* scale the diagonal block */ 1726 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1727 1728 if (rr) { 1729 /* Do a scatter end and then right scale the off-diagonal block */ 1730 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1731 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1732 } 1733 1734 PetscFunctionReturn(0); 1735 } 1736 1737 #undef __FUNCT__ 1738 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1739 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1740 { 1741 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1742 PetscErrorCode ierr; 1743 1744 PetscFunctionBegin; 1745 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1746 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1747 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1748 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1749 PetscFunctionReturn(0); 1750 } 1751 #undef __FUNCT__ 1752 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1753 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1754 { 1755 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1756 PetscErrorCode ierr; 1757 1758 PetscFunctionBegin; 1759 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatEqual_MPIAIJ" 1765 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1766 { 1767 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1768 Mat a,b,c,d; 1769 PetscTruth flg; 1770 PetscErrorCode ierr; 1771 1772 PetscFunctionBegin; 1773 a = matA->A; b = matA->B; 1774 c = matB->A; d = matB->B; 1775 1776 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1777 if (flg) { 1778 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1779 } 1780 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatCopy_MPIAIJ" 1786 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1787 { 1788 PetscErrorCode ierr; 1789 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1790 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1791 1792 PetscFunctionBegin; 1793 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1794 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1795 /* because of the column compression in the off-processor part of the matrix a->B, 1796 the number of columns in a->B and b->B may be different, hence we cannot call 1797 the MatCopy() directly on the two parts. If need be, we can provide a more 1798 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1799 then copying the submatrices */ 1800 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1801 } else { 1802 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1803 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1804 } 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1810 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1811 { 1812 PetscErrorCode ierr; 1813 1814 PetscFunctionBegin; 1815 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1816 PetscFunctionReturn(0); 1817 } 1818 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatAXPY_MPIAIJ" 1821 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1822 { 1823 PetscErrorCode ierr; 1824 PetscInt i; 1825 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1826 PetscBLASInt bnz,one=1; 1827 Mat_SeqAIJ *x,*y; 1828 1829 PetscFunctionBegin; 1830 if (str == SAME_NONZERO_PATTERN) { 1831 PetscScalar alpha = a; 1832 x = (Mat_SeqAIJ *)xx->A->data; 1833 y = (Mat_SeqAIJ *)yy->A->data; 1834 bnz = PetscBLASIntCast(x->nz); 1835 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1836 x = (Mat_SeqAIJ *)xx->B->data; 1837 y = (Mat_SeqAIJ *)yy->B->data; 1838 bnz = PetscBLASIntCast(x->nz); 1839 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1840 } else if (str == SUBSET_NONZERO_PATTERN) { 1841 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1842 1843 x = (Mat_SeqAIJ *)xx->B->data; 1844 y = (Mat_SeqAIJ *)yy->B->data; 1845 if (y->xtoy && y->XtoY != xx->B) { 1846 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1847 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1848 } 1849 if (!y->xtoy) { /* get xtoy */ 1850 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1851 y->XtoY = xx->B; 1852 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1853 } 1854 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1855 } else { 1856 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatConjugate_MPIAIJ" 1865 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1866 { 1867 #if defined(PETSC_USE_COMPLEX) 1868 PetscErrorCode ierr; 1869 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1870 1871 PetscFunctionBegin; 1872 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1873 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1874 #else 1875 PetscFunctionBegin; 1876 #endif 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "MatRealPart_MPIAIJ" 1882 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1883 { 1884 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1889 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1895 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1896 { 1897 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1898 PetscErrorCode ierr; 1899 1900 PetscFunctionBegin; 1901 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1902 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #ifdef PETSC_HAVE_PBGL 1907 1908 #include <boost/parallel/mpi/bsp_process_group.hpp> 1909 #include <boost/graph/distributed/ilu_default_graph.hpp> 1910 #include <boost/graph/distributed/ilu_0_block.hpp> 1911 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1912 #include <boost/graph/distributed/petsc/interface.hpp> 1913 #include <boost/multi_array.hpp> 1914 #include <boost/parallel/distributed_property_map->hpp> 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1918 /* 1919 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1920 */ 1921 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1922 { 1923 namespace petsc = boost::distributed::petsc; 1924 1925 namespace graph_dist = boost::graph::distributed; 1926 using boost::graph::distributed::ilu_default::process_group_type; 1927 using boost::graph::ilu_permuted; 1928 1929 PetscTruth row_identity, col_identity; 1930 PetscContainer c; 1931 PetscInt m, n, M, N; 1932 PetscErrorCode ierr; 1933 1934 PetscFunctionBegin; 1935 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1936 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1937 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1938 if (!row_identity || !col_identity) { 1939 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1940 } 1941 1942 process_group_type pg; 1943 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1944 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1945 lgraph_type& level_graph = *lgraph_p; 1946 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1947 1948 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1949 ilu_permuted(level_graph); 1950 1951 /* put together the new matrix */ 1952 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1953 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1954 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1955 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1956 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1957 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1959 1960 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1961 ierr = PetscContainerSetPointer(c, lgraph_p); 1962 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1968 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1969 { 1970 PetscFunctionBegin; 1971 PetscFunctionReturn(0); 1972 } 1973 1974 #undef __FUNCT__ 1975 #define __FUNCT__ "MatSolve_MPIAIJ" 1976 /* 1977 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1978 */ 1979 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1980 { 1981 namespace graph_dist = boost::graph::distributed; 1982 1983 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1984 lgraph_type* lgraph_p; 1985 PetscContainer c; 1986 PetscErrorCode ierr; 1987 1988 PetscFunctionBegin; 1989 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1990 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1991 ierr = VecCopy(b, x);CHKERRQ(ierr); 1992 1993 PetscScalar* array_x; 1994 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1995 PetscInt sx; 1996 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1997 1998 PetscScalar* array_b; 1999 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2000 PetscInt sb; 2001 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2002 2003 lgraph_type& level_graph = *lgraph_p; 2004 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2005 2006 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2007 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2008 ref_x(array_x, boost::extents[num_vertices(graph)]); 2009 2010 typedef boost::iterator_property_map<array_ref_type::iterator, 2011 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2012 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2013 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2014 2015 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2016 2017 PetscFunctionReturn(0); 2018 } 2019 #endif 2020 2021 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2022 PetscInt nzlocal,nsends,nrecvs; 2023 PetscMPIInt *send_rank,*recv_rank; 2024 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2025 PetscScalar *sbuf_a,**rbuf_a; 2026 PetscErrorCode (*MatDestroy)(Mat); 2027 } Mat_Redundant; 2028 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2031 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2032 { 2033 PetscErrorCode ierr; 2034 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2035 PetscInt i; 2036 2037 PetscFunctionBegin; 2038 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2039 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2040 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2041 for (i=0; i<redund->nrecvs; i++){ 2042 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2043 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2044 } 2045 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2046 ierr = PetscFree(redund);CHKERRQ(ierr); 2047 PetscFunctionReturn(0); 2048 } 2049 2050 #undef __FUNCT__ 2051 #define __FUNCT__ "MatDestroy_MatRedundant" 2052 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2053 { 2054 PetscErrorCode ierr; 2055 PetscContainer container; 2056 Mat_Redundant *redund=PETSC_NULL; 2057 2058 PetscFunctionBegin; 2059 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2060 if (container) { 2061 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2062 } else { 2063 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2064 } 2065 A->ops->destroy = redund->MatDestroy; 2066 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2067 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2068 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2074 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2075 { 2076 PetscMPIInt rank,size; 2077 MPI_Comm comm=((PetscObject)mat)->comm; 2078 PetscErrorCode ierr; 2079 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2080 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2081 PetscInt *rowrange=mat->rmap->range; 2082 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2083 Mat A=aij->A,B=aij->B,C=*matredundant; 2084 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2085 PetscScalar *sbuf_a; 2086 PetscInt nzlocal=a->nz+b->nz; 2087 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2088 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2089 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2090 MatScalar *aworkA,*aworkB; 2091 PetscScalar *vals; 2092 PetscMPIInt tag1,tag2,tag3,imdex; 2093 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2094 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2095 MPI_Status recv_status,*send_status; 2096 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2097 PetscInt **rbuf_j=PETSC_NULL; 2098 PetscScalar **rbuf_a=PETSC_NULL; 2099 Mat_Redundant *redund=PETSC_NULL; 2100 PetscContainer container; 2101 2102 PetscFunctionBegin; 2103 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2104 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2105 2106 if (reuse == MAT_REUSE_MATRIX) { 2107 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2108 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2109 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2110 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2111 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2112 if (container) { 2113 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2114 } else { 2115 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2116 } 2117 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2118 2119 nsends = redund->nsends; 2120 nrecvs = redund->nrecvs; 2121 send_rank = redund->send_rank; 2122 recv_rank = redund->recv_rank; 2123 sbuf_nz = redund->sbuf_nz; 2124 rbuf_nz = redund->rbuf_nz; 2125 sbuf_j = redund->sbuf_j; 2126 sbuf_a = redund->sbuf_a; 2127 rbuf_j = redund->rbuf_j; 2128 rbuf_a = redund->rbuf_a; 2129 } 2130 2131 if (reuse == MAT_INITIAL_MATRIX){ 2132 PetscMPIInt subrank,subsize; 2133 PetscInt nleftover,np_subcomm; 2134 /* get the destination processors' id send_rank, nsends and nrecvs */ 2135 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2136 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2137 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2138 np_subcomm = size/nsubcomm; 2139 nleftover = size - nsubcomm*np_subcomm; 2140 nsends = 0; nrecvs = 0; 2141 for (i=0; i<size; i++){ /* i=rank*/ 2142 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2143 send_rank[nsends] = i; nsends++; 2144 recv_rank[nrecvs++] = i; 2145 } 2146 } 2147 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2148 i = size-nleftover-1; 2149 j = 0; 2150 while (j < nsubcomm - nleftover){ 2151 send_rank[nsends++] = i; 2152 i--; j++; 2153 } 2154 } 2155 2156 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2157 for (i=0; i<nleftover; i++){ 2158 recv_rank[nrecvs++] = size-nleftover+i; 2159 } 2160 } 2161 2162 /* allocate sbuf_j, sbuf_a */ 2163 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2164 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2165 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2166 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2167 2168 /* copy mat's local entries into the buffers */ 2169 if (reuse == MAT_INITIAL_MATRIX){ 2170 rownz_max = 0; 2171 rptr = sbuf_j; 2172 cols = sbuf_j + rend-rstart + 1; 2173 vals = sbuf_a; 2174 rptr[0] = 0; 2175 for (i=0; i<rend-rstart; i++){ 2176 row = i + rstart; 2177 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2178 ncols = nzA + nzB; 2179 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2180 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2181 /* load the column indices for this row into cols */ 2182 lwrite = 0; 2183 for (l=0; l<nzB; l++) { 2184 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2185 vals[lwrite] = aworkB[l]; 2186 cols[lwrite++] = ctmp; 2187 } 2188 } 2189 for (l=0; l<nzA; l++){ 2190 vals[lwrite] = aworkA[l]; 2191 cols[lwrite++] = cstart + cworkA[l]; 2192 } 2193 for (l=0; l<nzB; l++) { 2194 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2195 vals[lwrite] = aworkB[l]; 2196 cols[lwrite++] = ctmp; 2197 } 2198 } 2199 vals += ncols; 2200 cols += ncols; 2201 rptr[i+1] = rptr[i] + ncols; 2202 if (rownz_max < ncols) rownz_max = ncols; 2203 } 2204 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2205 } else { /* only copy matrix values into sbuf_a */ 2206 rptr = sbuf_j; 2207 vals = sbuf_a; 2208 rptr[0] = 0; 2209 for (i=0; i<rend-rstart; i++){ 2210 row = i + rstart; 2211 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2212 ncols = nzA + nzB; 2213 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2214 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2215 lwrite = 0; 2216 for (l=0; l<nzB; l++) { 2217 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2218 } 2219 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2220 for (l=0; l<nzB; l++) { 2221 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2222 } 2223 vals += ncols; 2224 rptr[i+1] = rptr[i] + ncols; 2225 } 2226 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2227 2228 /* send nzlocal to others, and recv other's nzlocal */ 2229 /*--------------------------------------------------*/ 2230 if (reuse == MAT_INITIAL_MATRIX){ 2231 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2232 s_waits2 = s_waits3 + nsends; 2233 s_waits1 = s_waits2 + nsends; 2234 r_waits1 = s_waits1 + nsends; 2235 r_waits2 = r_waits1 + nrecvs; 2236 r_waits3 = r_waits2 + nrecvs; 2237 } else { 2238 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2239 r_waits3 = s_waits3 + nsends; 2240 } 2241 2242 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2243 if (reuse == MAT_INITIAL_MATRIX){ 2244 /* get new tags to keep the communication clean */ 2245 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2246 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2247 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2248 2249 /* post receives of other's nzlocal */ 2250 for (i=0; i<nrecvs; i++){ 2251 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2252 } 2253 /* send nzlocal to others */ 2254 for (i=0; i<nsends; i++){ 2255 sbuf_nz[i] = nzlocal; 2256 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2257 } 2258 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2259 count = nrecvs; 2260 while (count) { 2261 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2262 recv_rank[imdex] = recv_status.MPI_SOURCE; 2263 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2264 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2265 2266 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2267 rbuf_nz[imdex] += i + 2; 2268 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2269 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2270 count--; 2271 } 2272 /* wait on sends of nzlocal */ 2273 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2274 /* send mat->i,j to others, and recv from other's */ 2275 /*------------------------------------------------*/ 2276 for (i=0; i<nsends; i++){ 2277 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2278 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2279 } 2280 /* wait on receives of mat->i,j */ 2281 /*------------------------------*/ 2282 count = nrecvs; 2283 while (count) { 2284 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2285 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2286 count--; 2287 } 2288 /* wait on sends of mat->i,j */ 2289 /*---------------------------*/ 2290 if (nsends) { 2291 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2292 } 2293 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2294 2295 /* post receives, send and receive mat->a */ 2296 /*----------------------------------------*/ 2297 for (imdex=0; imdex<nrecvs; imdex++) { 2298 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2299 } 2300 for (i=0; i<nsends; i++){ 2301 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2302 } 2303 count = nrecvs; 2304 while (count) { 2305 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2306 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2307 count--; 2308 } 2309 if (nsends) { 2310 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2311 } 2312 2313 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2314 2315 /* create redundant matrix */ 2316 /*-------------------------*/ 2317 if (reuse == MAT_INITIAL_MATRIX){ 2318 /* compute rownz_max for preallocation */ 2319 for (imdex=0; imdex<nrecvs; imdex++){ 2320 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2321 rptr = rbuf_j[imdex]; 2322 for (i=0; i<j; i++){ 2323 ncols = rptr[i+1] - rptr[i]; 2324 if (rownz_max < ncols) rownz_max = ncols; 2325 } 2326 } 2327 2328 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2329 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2330 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2331 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2332 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2333 } else { 2334 C = *matredundant; 2335 } 2336 2337 /* insert local matrix entries */ 2338 rptr = sbuf_j; 2339 cols = sbuf_j + rend-rstart + 1; 2340 vals = sbuf_a; 2341 for (i=0; i<rend-rstart; i++){ 2342 row = i + rstart; 2343 ncols = rptr[i+1] - rptr[i]; 2344 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2345 vals += ncols; 2346 cols += ncols; 2347 } 2348 /* insert received matrix entries */ 2349 for (imdex=0; imdex<nrecvs; imdex++){ 2350 rstart = rowrange[recv_rank[imdex]]; 2351 rend = rowrange[recv_rank[imdex]+1]; 2352 rptr = rbuf_j[imdex]; 2353 cols = rbuf_j[imdex] + rend-rstart + 1; 2354 vals = rbuf_a[imdex]; 2355 for (i=0; i<rend-rstart; i++){ 2356 row = i + rstart; 2357 ncols = rptr[i+1] - rptr[i]; 2358 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2359 vals += ncols; 2360 cols += ncols; 2361 } 2362 } 2363 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2364 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2365 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2366 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2367 if (reuse == MAT_INITIAL_MATRIX){ 2368 PetscContainer container; 2369 *matredundant = C; 2370 /* create a supporting struct and attach it to C for reuse */ 2371 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2372 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2373 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2374 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2375 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2376 2377 redund->nzlocal = nzlocal; 2378 redund->nsends = nsends; 2379 redund->nrecvs = nrecvs; 2380 redund->send_rank = send_rank; 2381 redund->recv_rank = recv_rank; 2382 redund->sbuf_nz = sbuf_nz; 2383 redund->rbuf_nz = rbuf_nz; 2384 redund->sbuf_j = sbuf_j; 2385 redund->sbuf_a = sbuf_a; 2386 redund->rbuf_j = rbuf_j; 2387 redund->rbuf_a = rbuf_a; 2388 2389 redund->MatDestroy = C->ops->destroy; 2390 C->ops->destroy = MatDestroy_MatRedundant; 2391 } 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2397 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2398 { 2399 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2400 PetscErrorCode ierr; 2401 PetscInt i,*idxb = 0; 2402 PetscScalar *va,*vb; 2403 Vec vtmp; 2404 2405 PetscFunctionBegin; 2406 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2407 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2408 if (idx) { 2409 for (i=0; i<A->rmap->n; i++) { 2410 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2411 } 2412 } 2413 2414 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2415 if (idx) { 2416 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2417 } 2418 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2419 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2420 2421 for (i=0; i<A->rmap->n; i++){ 2422 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2423 va[i] = vb[i]; 2424 if (idx) idx[i] = a->garray[idxb[i]]; 2425 } 2426 } 2427 2428 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2429 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2430 if (idxb) { 2431 ierr = PetscFree(idxb);CHKERRQ(ierr); 2432 } 2433 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 #undef __FUNCT__ 2438 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2439 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2440 { 2441 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2442 PetscErrorCode ierr; 2443 PetscInt i,*idxb = 0; 2444 PetscScalar *va,*vb; 2445 Vec vtmp; 2446 2447 PetscFunctionBegin; 2448 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2449 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2450 if (idx) { 2451 for (i=0; i<A->cmap->n; i++) { 2452 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2453 } 2454 } 2455 2456 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2457 if (idx) { 2458 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2459 } 2460 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2461 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2462 2463 for (i=0; i<A->rmap->n; i++){ 2464 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2465 va[i] = vb[i]; 2466 if (idx) idx[i] = a->garray[idxb[i]]; 2467 } 2468 } 2469 2470 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2471 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2472 if (idxb) { 2473 ierr = PetscFree(idxb);CHKERRQ(ierr); 2474 } 2475 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 #undef __FUNCT__ 2480 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2481 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2482 { 2483 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2484 PetscInt n = A->rmap->n; 2485 PetscInt cstart = A->cmap->rstart; 2486 PetscInt *cmap = mat->garray; 2487 PetscInt *diagIdx, *offdiagIdx; 2488 Vec diagV, offdiagV; 2489 PetscScalar *a, *diagA, *offdiagA; 2490 PetscInt r; 2491 PetscErrorCode ierr; 2492 2493 PetscFunctionBegin; 2494 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2495 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2496 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2497 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2498 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2499 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2500 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2501 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2502 for(r = 0; r < n; ++r) { 2503 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2504 a[r] = diagA[r]; 2505 idx[r] = cstart + diagIdx[r]; 2506 } else { 2507 a[r] = offdiagA[r]; 2508 idx[r] = cmap[offdiagIdx[r]]; 2509 } 2510 } 2511 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2512 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2513 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2514 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2515 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2516 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 #undef __FUNCT__ 2521 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2522 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2523 { 2524 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2525 PetscInt n = A->rmap->n; 2526 PetscInt cstart = A->cmap->rstart; 2527 PetscInt *cmap = mat->garray; 2528 PetscInt *diagIdx, *offdiagIdx; 2529 Vec diagV, offdiagV; 2530 PetscScalar *a, *diagA, *offdiagA; 2531 PetscInt r; 2532 PetscErrorCode ierr; 2533 2534 PetscFunctionBegin; 2535 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2536 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2537 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2538 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2539 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2540 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2541 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2542 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2543 for(r = 0; r < n; ++r) { 2544 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 idx[r] = cstart + diagIdx[r]; 2547 } else { 2548 a[r] = offdiagA[r]; 2549 idx[r] = cmap[offdiagIdx[r]]; 2550 } 2551 } 2552 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2553 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2554 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2555 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2556 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2557 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2563 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2564 { 2565 PetscErrorCode ierr; 2566 Mat *dummy; 2567 2568 PetscFunctionBegin; 2569 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2570 *newmat = *dummy; 2571 ierr = PetscFree(dummy);CHKERRQ(ierr); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2576 /* -------------------------------------------------------------------*/ 2577 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2578 MatGetRow_MPIAIJ, 2579 MatRestoreRow_MPIAIJ, 2580 MatMult_MPIAIJ, 2581 /* 4*/ MatMultAdd_MPIAIJ, 2582 MatMultTranspose_MPIAIJ, 2583 MatMultTransposeAdd_MPIAIJ, 2584 #ifdef PETSC_HAVE_PBGL 2585 MatSolve_MPIAIJ, 2586 #else 2587 0, 2588 #endif 2589 0, 2590 0, 2591 /*10*/ 0, 2592 0, 2593 0, 2594 MatSOR_MPIAIJ, 2595 MatTranspose_MPIAIJ, 2596 /*15*/ MatGetInfo_MPIAIJ, 2597 MatEqual_MPIAIJ, 2598 MatGetDiagonal_MPIAIJ, 2599 MatDiagonalScale_MPIAIJ, 2600 MatNorm_MPIAIJ, 2601 /*20*/ MatAssemblyBegin_MPIAIJ, 2602 MatAssemblyEnd_MPIAIJ, 2603 MatSetOption_MPIAIJ, 2604 MatZeroEntries_MPIAIJ, 2605 /*24*/ MatZeroRows_MPIAIJ, 2606 0, 2607 #ifdef PETSC_HAVE_PBGL 2608 0, 2609 #else 2610 0, 2611 #endif 2612 0, 2613 0, 2614 /*29*/ MatSetUpPreallocation_MPIAIJ, 2615 #ifdef PETSC_HAVE_PBGL 2616 0, 2617 #else 2618 0, 2619 #endif 2620 0, 2621 0, 2622 0, 2623 /*34*/ MatDuplicate_MPIAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /*39*/ MatAXPY_MPIAIJ, 2629 MatGetSubMatrices_MPIAIJ, 2630 MatIncreaseOverlap_MPIAIJ, 2631 MatGetValues_MPIAIJ, 2632 MatCopy_MPIAIJ, 2633 /*44*/ MatGetRowMax_MPIAIJ, 2634 MatScale_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 /*49*/ MatSetBlockSize_MPIAIJ, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /*54*/ MatFDColoringCreate_MPIAIJ, 2644 0, 2645 MatSetUnfactored_MPIAIJ, 2646 MatPermute_MPIAIJ, 2647 0, 2648 /*59*/ MatGetSubMatrix_MPIAIJ, 2649 MatDestroy_MPIAIJ, 2650 MatView_MPIAIJ, 2651 0, 2652 0, 2653 /*64*/ 0, 2654 0, 2655 0, 2656 0, 2657 0, 2658 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2659 MatGetRowMinAbs_MPIAIJ, 2660 0, 2661 MatSetColoring_MPIAIJ, 2662 #if defined(PETSC_HAVE_ADIC) 2663 MatSetValuesAdic_MPIAIJ, 2664 #else 2665 0, 2666 #endif 2667 MatSetValuesAdifor_MPIAIJ, 2668 /*75*/ MatFDColoringApply_AIJ, 2669 0, 2670 0, 2671 0, 2672 0, 2673 /*80*/ 0, 2674 0, 2675 0, 2676 /*83*/ MatLoad_MPIAIJ, 2677 0, 2678 0, 2679 0, 2680 0, 2681 0, 2682 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2683 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2684 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2685 MatPtAP_Basic, 2686 MatPtAPSymbolic_MPIAIJ, 2687 /*94*/ MatPtAPNumeric_MPIAIJ, 2688 0, 2689 0, 2690 0, 2691 0, 2692 /*99*/ 0, 2693 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2694 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2695 MatConjugate_MPIAIJ, 2696 0, 2697 /*104*/MatSetValuesRow_MPIAIJ, 2698 MatRealPart_MPIAIJ, 2699 MatImaginaryPart_MPIAIJ, 2700 0, 2701 0, 2702 /*109*/0, 2703 MatGetRedundantMatrix_MPIAIJ, 2704 MatGetRowMin_MPIAIJ, 2705 0, 2706 0, 2707 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2708 0, 2709 0, 2710 0, 2711 0, 2712 0 2713 }; 2714 2715 /* ----------------------------------------------------------------------------------------*/ 2716 2717 EXTERN_C_BEGIN 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2720 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2721 { 2722 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2723 PetscErrorCode ierr; 2724 2725 PetscFunctionBegin; 2726 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2727 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2728 PetscFunctionReturn(0); 2729 } 2730 EXTERN_C_END 2731 2732 EXTERN_C_BEGIN 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2735 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2736 { 2737 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2738 PetscErrorCode ierr; 2739 2740 PetscFunctionBegin; 2741 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2742 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2743 PetscFunctionReturn(0); 2744 } 2745 EXTERN_C_END 2746 2747 #include "petscpc.h" 2748 EXTERN_C_BEGIN 2749 #undef __FUNCT__ 2750 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2751 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2752 { 2753 Mat_MPIAIJ *b; 2754 PetscErrorCode ierr; 2755 PetscInt i; 2756 2757 PetscFunctionBegin; 2758 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2759 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2760 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2761 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2762 2763 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2764 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2765 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2766 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2767 if (d_nnz) { 2768 for (i=0; i<B->rmap->n; i++) { 2769 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2770 } 2771 } 2772 if (o_nnz) { 2773 for (i=0; i<B->rmap->n; i++) { 2774 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2775 } 2776 } 2777 b = (Mat_MPIAIJ*)B->data; 2778 2779 if (!B->preallocated) { 2780 /* Explicitly create 2 MATSEQAIJ matrices. */ 2781 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2782 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2783 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2785 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2786 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2787 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2788 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2789 } 2790 2791 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2792 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2793 B->preallocated = PETSC_TRUE; 2794 PetscFunctionReturn(0); 2795 } 2796 EXTERN_C_END 2797 2798 #undef __FUNCT__ 2799 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2800 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2801 { 2802 Mat mat; 2803 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2804 PetscErrorCode ierr; 2805 2806 PetscFunctionBegin; 2807 *newmat = 0; 2808 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2809 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2810 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2811 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2812 a = (Mat_MPIAIJ*)mat->data; 2813 2814 mat->factortype = matin->factortype; 2815 mat->rmap->bs = matin->rmap->bs; 2816 mat->assembled = PETSC_TRUE; 2817 mat->insertmode = NOT_SET_VALUES; 2818 mat->preallocated = PETSC_TRUE; 2819 2820 a->size = oldmat->size; 2821 a->rank = oldmat->rank; 2822 a->donotstash = oldmat->donotstash; 2823 a->roworiented = oldmat->roworiented; 2824 a->rowindices = 0; 2825 a->rowvalues = 0; 2826 a->getrowactive = PETSC_FALSE; 2827 2828 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2829 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2830 2831 if (oldmat->colmap) { 2832 #if defined (PETSC_USE_CTABLE) 2833 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2834 #else 2835 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2836 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2837 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2838 #endif 2839 } else a->colmap = 0; 2840 if (oldmat->garray) { 2841 PetscInt len; 2842 len = oldmat->B->cmap->n; 2843 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2844 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2845 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2846 } else a->garray = 0; 2847 2848 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2849 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2850 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2851 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2852 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2854 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2856 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2857 *newmat = mat; 2858 PetscFunctionReturn(0); 2859 } 2860 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "MatLoad_MPIAIJ" 2863 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2864 { 2865 Mat A; 2866 PetscScalar *vals,*svals; 2867 MPI_Comm comm = ((PetscObject)viewer)->comm; 2868 MPI_Status status; 2869 PetscErrorCode ierr; 2870 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2871 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2872 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2873 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2874 PetscInt cend,cstart,n,*rowners; 2875 int fd; 2876 2877 PetscFunctionBegin; 2878 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2879 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2880 if (!rank) { 2881 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2882 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2883 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2884 } 2885 2886 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2887 M = header[1]; N = header[2]; 2888 /* determine ownership of all rows */ 2889 m = M/size + ((M % size) > rank); 2890 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2891 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2892 2893 /* First process needs enough room for process with most rows */ 2894 if (!rank) { 2895 mmax = rowners[1]; 2896 for (i=2; i<size; i++) { 2897 mmax = PetscMax(mmax,rowners[i]); 2898 } 2899 } else mmax = m; 2900 2901 rowners[0] = 0; 2902 for (i=2; i<=size; i++) { 2903 rowners[i] += rowners[i-1]; 2904 } 2905 rstart = rowners[rank]; 2906 rend = rowners[rank+1]; 2907 2908 /* distribute row lengths to all processors */ 2909 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2910 if (!rank) { 2911 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2912 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2913 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2914 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2915 for (j=0; j<m; j++) { 2916 procsnz[0] += ourlens[j]; 2917 } 2918 for (i=1; i<size; i++) { 2919 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2920 /* calculate the number of nonzeros on each processor */ 2921 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2922 procsnz[i] += rowlengths[j]; 2923 } 2924 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2925 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2926 } 2927 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2928 } else { 2929 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2930 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2931 } 2932 2933 if (!rank) { 2934 /* determine max buffer needed and allocate it */ 2935 maxnz = 0; 2936 for (i=0; i<size; i++) { 2937 maxnz = PetscMax(maxnz,procsnz[i]); 2938 } 2939 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2940 2941 /* read in my part of the matrix column indices */ 2942 nz = procsnz[0]; 2943 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2944 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2945 2946 /* read in every one elses and ship off */ 2947 for (i=1; i<size; i++) { 2948 nz = procsnz[i]; 2949 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2950 mpicnt = PetscMPIIntCast(nz); 2951 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2952 } 2953 ierr = PetscFree(cols);CHKERRQ(ierr); 2954 } else { 2955 /* determine buffer space needed for message */ 2956 nz = 0; 2957 for (i=0; i<m; i++) { 2958 nz += ourlens[i]; 2959 } 2960 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2961 2962 /* receive message of column indices*/ 2963 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2964 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2965 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2966 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2967 else if (mpimaxnz < 0) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2968 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2969 } 2970 2971 /* determine column ownership if matrix is not square */ 2972 if (N != M) { 2973 n = N/size + ((N % size) > rank); 2974 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2975 cstart = cend - n; 2976 } else { 2977 cstart = rstart; 2978 cend = rend; 2979 n = cend - cstart; 2980 } 2981 2982 /* loop over local rows, determining number of off diagonal entries */ 2983 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2984 jj = 0; 2985 for (i=0; i<m; i++) { 2986 for (j=0; j<ourlens[i]; j++) { 2987 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2988 jj++; 2989 } 2990 } 2991 2992 /* create our matrix */ 2993 for (i=0; i<m; i++) { 2994 ourlens[i] -= offlens[i]; 2995 } 2996 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2997 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2998 ierr = MatSetType(A,type);CHKERRQ(ierr); 2999 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3000 3001 for (i=0; i<m; i++) { 3002 ourlens[i] += offlens[i]; 3003 } 3004 3005 if (!rank) { 3006 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3007 3008 /* read in my part of the matrix numerical values */ 3009 nz = procsnz[0]; 3010 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3011 3012 /* insert into matrix */ 3013 jj = rstart; 3014 smycols = mycols; 3015 svals = vals; 3016 for (i=0; i<m; i++) { 3017 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3018 smycols += ourlens[i]; 3019 svals += ourlens[i]; 3020 jj++; 3021 } 3022 3023 /* read in other processors and ship out */ 3024 for (i=1; i<size; i++) { 3025 nz = procsnz[i]; 3026 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3027 mpicnt = PetscMPIIntCast(nz); 3028 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3029 } 3030 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3031 } else { 3032 /* receive numeric values */ 3033 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3034 3035 /* receive message of values*/ 3036 mpicnt = PetscMPIIntCast(nz); 3037 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3038 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3039 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3040 else if (mpimaxnz < 0) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3041 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3042 3043 /* insert into matrix */ 3044 jj = rstart; 3045 smycols = mycols; 3046 svals = vals; 3047 for (i=0; i<m; i++) { 3048 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3049 smycols += ourlens[i]; 3050 svals += ourlens[i]; 3051 jj++; 3052 } 3053 } 3054 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3055 ierr = PetscFree(vals);CHKERRQ(ierr); 3056 ierr = PetscFree(mycols);CHKERRQ(ierr); 3057 ierr = PetscFree(rowners);CHKERRQ(ierr); 3058 3059 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 *newmat = A; 3062 PetscFunctionReturn(0); 3063 } 3064 3065 #undef __FUNCT__ 3066 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3067 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3068 { 3069 PetscErrorCode ierr; 3070 IS iscol_local; 3071 PetscInt csize; 3072 3073 PetscFunctionBegin; 3074 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3075 if (call == MAT_REUSE_MATRIX) { 3076 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3077 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3078 } else { 3079 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3080 } 3081 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3082 if (call == MAT_INITIAL_MATRIX) { 3083 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3084 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3085 } 3086 PetscFunctionReturn(0); 3087 } 3088 3089 #undef __FUNCT__ 3090 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3091 /* 3092 Not great since it makes two copies of the submatrix, first an SeqAIJ 3093 in local and then by concatenating the local matrices the end result. 3094 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3095 3096 Note: This requires a sequential iscol with all indices. 3097 */ 3098 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3099 { 3100 PetscErrorCode ierr; 3101 PetscMPIInt rank,size; 3102 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3103 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3104 Mat *local,M,Mreuse; 3105 MatScalar *vwork,*aa; 3106 MPI_Comm comm = ((PetscObject)mat)->comm; 3107 Mat_SeqAIJ *aij; 3108 3109 3110 PetscFunctionBegin; 3111 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3112 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3113 3114 if (call == MAT_REUSE_MATRIX) { 3115 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3116 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3117 local = &Mreuse; 3118 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3119 } else { 3120 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3121 Mreuse = *local; 3122 ierr = PetscFree(local);CHKERRQ(ierr); 3123 } 3124 3125 /* 3126 m - number of local rows 3127 n - number of columns (same on all processors) 3128 rstart - first row in new global matrix generated 3129 */ 3130 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3131 if (call == MAT_INITIAL_MATRIX) { 3132 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3133 ii = aij->i; 3134 jj = aij->j; 3135 3136 /* 3137 Determine the number of non-zeros in the diagonal and off-diagonal 3138 portions of the matrix in order to do correct preallocation 3139 */ 3140 3141 /* first get start and end of "diagonal" columns */ 3142 if (csize == PETSC_DECIDE) { 3143 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3144 if (mglobal == n) { /* square matrix */ 3145 nlocal = m; 3146 } else { 3147 nlocal = n/size + ((n % size) > rank); 3148 } 3149 } else { 3150 nlocal = csize; 3151 } 3152 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3153 rstart = rend - nlocal; 3154 if (rank == size - 1 && rend != n) { 3155 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3156 } 3157 3158 /* next, compute all the lengths */ 3159 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3160 olens = dlens + m; 3161 for (i=0; i<m; i++) { 3162 jend = ii[i+1] - ii[i]; 3163 olen = 0; 3164 dlen = 0; 3165 for (j=0; j<jend; j++) { 3166 if (*jj < rstart || *jj >= rend) olen++; 3167 else dlen++; 3168 jj++; 3169 } 3170 olens[i] = olen; 3171 dlens[i] = dlen; 3172 } 3173 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3174 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3175 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3176 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3177 ierr = PetscFree(dlens);CHKERRQ(ierr); 3178 } else { 3179 PetscInt ml,nl; 3180 3181 M = *newmat; 3182 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3183 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3184 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3185 /* 3186 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3187 rather than the slower MatSetValues(). 3188 */ 3189 M->was_assembled = PETSC_TRUE; 3190 M->assembled = PETSC_FALSE; 3191 } 3192 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3193 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3194 ii = aij->i; 3195 jj = aij->j; 3196 aa = aij->a; 3197 for (i=0; i<m; i++) { 3198 row = rstart + i; 3199 nz = ii[i+1] - ii[i]; 3200 cwork = jj; jj += nz; 3201 vwork = aa; aa += nz; 3202 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3203 } 3204 3205 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3206 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3207 *newmat = M; 3208 3209 /* save submatrix used in processor for next request */ 3210 if (call == MAT_INITIAL_MATRIX) { 3211 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3212 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3213 } 3214 3215 PetscFunctionReturn(0); 3216 } 3217 3218 EXTERN_C_BEGIN 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3221 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3222 { 3223 PetscInt m,cstart, cend,j,nnz,i,d; 3224 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3225 const PetscInt *JJ; 3226 PetscScalar *values; 3227 PetscErrorCode ierr; 3228 3229 PetscFunctionBegin; 3230 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3231 3232 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3233 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3234 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3235 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3236 m = B->rmap->n; 3237 cstart = B->cmap->rstart; 3238 cend = B->cmap->rend; 3239 rstart = B->rmap->rstart; 3240 3241 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3242 3243 #if defined(PETSC_USE_DEBUGGING) 3244 for (i=0; i<m; i++) { 3245 nnz = Ii[i+1]- Ii[i]; 3246 JJ = J + Ii[i]; 3247 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3248 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3249 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3250 } 3251 #endif 3252 3253 for (i=0; i<m; i++) { 3254 nnz = Ii[i+1]- Ii[i]; 3255 JJ = J + Ii[i]; 3256 nnz_max = PetscMax(nnz_max,nnz); 3257 d = 0; 3258 for (j=0; j<nnz; j++) { 3259 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3260 } 3261 d_nnz[i] = d; 3262 o_nnz[i] = nnz - d; 3263 } 3264 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3265 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3266 3267 if (v) values = (PetscScalar*)v; 3268 else { 3269 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3270 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3271 } 3272 3273 for (i=0; i<m; i++) { 3274 ii = i + rstart; 3275 nnz = Ii[i+1]- Ii[i]; 3276 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3277 } 3278 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3279 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3280 3281 if (!v) { 3282 ierr = PetscFree(values);CHKERRQ(ierr); 3283 } 3284 PetscFunctionReturn(0); 3285 } 3286 EXTERN_C_END 3287 3288 #undef __FUNCT__ 3289 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3290 /*@ 3291 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3292 (the default parallel PETSc format). 3293 3294 Collective on MPI_Comm 3295 3296 Input Parameters: 3297 + B - the matrix 3298 . i - the indices into j for the start of each local row (starts with zero) 3299 . j - the column indices for each local row (starts with zero) 3300 - v - optional values in the matrix 3301 3302 Level: developer 3303 3304 Notes: 3305 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3306 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3307 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3308 3309 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3310 3311 The format which is used for the sparse matrix input, is equivalent to a 3312 row-major ordering.. i.e for the following matrix, the input data expected is 3313 as shown: 3314 3315 1 0 0 3316 2 0 3 P0 3317 ------- 3318 4 5 6 P1 3319 3320 Process0 [P0]: rows_owned=[0,1] 3321 i = {0,1,3} [size = nrow+1 = 2+1] 3322 j = {0,0,2} [size = nz = 6] 3323 v = {1,2,3} [size = nz = 6] 3324 3325 Process1 [P1]: rows_owned=[2] 3326 i = {0,3} [size = nrow+1 = 1+1] 3327 j = {0,1,2} [size = nz = 6] 3328 v = {4,5,6} [size = nz = 6] 3329 3330 .keywords: matrix, aij, compressed row, sparse, parallel 3331 3332 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3333 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3334 @*/ 3335 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3336 { 3337 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3338 3339 PetscFunctionBegin; 3340 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3341 if (f) { 3342 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3343 } 3344 PetscFunctionReturn(0); 3345 } 3346 3347 #undef __FUNCT__ 3348 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3349 /*@C 3350 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3351 (the default parallel PETSc format). For good matrix assembly performance 3352 the user should preallocate the matrix storage by setting the parameters 3353 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3354 performance can be increased by more than a factor of 50. 3355 3356 Collective on MPI_Comm 3357 3358 Input Parameters: 3359 + A - the matrix 3360 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3361 (same value is used for all local rows) 3362 . d_nnz - array containing the number of nonzeros in the various rows of the 3363 DIAGONAL portion of the local submatrix (possibly different for each row) 3364 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3365 The size of this array is equal to the number of local rows, i.e 'm'. 3366 You must leave room for the diagonal entry even if it is zero. 3367 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3368 submatrix (same value is used for all local rows). 3369 - o_nnz - array containing the number of nonzeros in the various rows of the 3370 OFF-DIAGONAL portion of the local submatrix (possibly different for 3371 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3372 structure. The size of this array is equal to the number 3373 of local rows, i.e 'm'. 3374 3375 If the *_nnz parameter is given then the *_nz parameter is ignored 3376 3377 The AIJ format (also called the Yale sparse matrix format or 3378 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3379 storage. The stored row and column indices begin with zero. See the users manual for details. 3380 3381 The parallel matrix is partitioned such that the first m0 rows belong to 3382 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3383 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3384 3385 The DIAGONAL portion of the local submatrix of a processor can be defined 3386 as the submatrix which is obtained by extraction the part corresponding 3387 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3388 first row that belongs to the processor, and r2 is the last row belonging 3389 to the this processor. This is a square mxm matrix. The remaining portion 3390 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3391 3392 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3393 3394 You can call MatGetInfo() to get information on how effective the preallocation was; 3395 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3396 You can also run with the option -info and look for messages with the string 3397 malloc in them to see if additional memory allocation was needed. 3398 3399 Example usage: 3400 3401 Consider the following 8x8 matrix with 34 non-zero values, that is 3402 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3403 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3404 as follows: 3405 3406 .vb 3407 1 2 0 | 0 3 0 | 0 4 3408 Proc0 0 5 6 | 7 0 0 | 8 0 3409 9 0 10 | 11 0 0 | 12 0 3410 ------------------------------------- 3411 13 0 14 | 15 16 17 | 0 0 3412 Proc1 0 18 0 | 19 20 21 | 0 0 3413 0 0 0 | 22 23 0 | 24 0 3414 ------------------------------------- 3415 Proc2 25 26 27 | 0 0 28 | 29 0 3416 30 0 0 | 31 32 33 | 0 34 3417 .ve 3418 3419 This can be represented as a collection of submatrices as: 3420 3421 .vb 3422 A B C 3423 D E F 3424 G H I 3425 .ve 3426 3427 Where the submatrices A,B,C are owned by proc0, D,E,F are 3428 owned by proc1, G,H,I are owned by proc2. 3429 3430 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3431 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3432 The 'M','N' parameters are 8,8, and have the same values on all procs. 3433 3434 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3435 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3436 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3437 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3438 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3439 matrix, ans [DF] as another SeqAIJ matrix. 3440 3441 When d_nz, o_nz parameters are specified, d_nz storage elements are 3442 allocated for every row of the local diagonal submatrix, and o_nz 3443 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3444 One way to choose d_nz and o_nz is to use the max nonzerors per local 3445 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3446 In this case, the values of d_nz,o_nz are: 3447 .vb 3448 proc0 : dnz = 2, o_nz = 2 3449 proc1 : dnz = 3, o_nz = 2 3450 proc2 : dnz = 1, o_nz = 4 3451 .ve 3452 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3453 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3454 for proc3. i.e we are using 12+15+10=37 storage locations to store 3455 34 values. 3456 3457 When d_nnz, o_nnz parameters are specified, the storage is specified 3458 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3459 In the above case the values for d_nnz,o_nnz are: 3460 .vb 3461 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3462 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3463 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3464 .ve 3465 Here the space allocated is sum of all the above values i.e 34, and 3466 hence pre-allocation is perfect. 3467 3468 Level: intermediate 3469 3470 .keywords: matrix, aij, compressed row, sparse, parallel 3471 3472 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3473 MPIAIJ, MatGetInfo() 3474 @*/ 3475 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3476 { 3477 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3478 3479 PetscFunctionBegin; 3480 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3481 if (f) { 3482 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3483 } 3484 PetscFunctionReturn(0); 3485 } 3486 3487 #undef __FUNCT__ 3488 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3489 /*@ 3490 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3491 CSR format the local rows. 3492 3493 Collective on MPI_Comm 3494 3495 Input Parameters: 3496 + comm - MPI communicator 3497 . m - number of local rows (Cannot be PETSC_DECIDE) 3498 . n - This value should be the same as the local size used in creating the 3499 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3500 calculated if N is given) For square matrices n is almost always m. 3501 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3502 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3503 . i - row indices 3504 . j - column indices 3505 - a - matrix values 3506 3507 Output Parameter: 3508 . mat - the matrix 3509 3510 Level: intermediate 3511 3512 Notes: 3513 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3514 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3515 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3516 3517 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3518 3519 The format which is used for the sparse matrix input, is equivalent to a 3520 row-major ordering.. i.e for the following matrix, the input data expected is 3521 as shown: 3522 3523 1 0 0 3524 2 0 3 P0 3525 ------- 3526 4 5 6 P1 3527 3528 Process0 [P0]: rows_owned=[0,1] 3529 i = {0,1,3} [size = nrow+1 = 2+1] 3530 j = {0,0,2} [size = nz = 6] 3531 v = {1,2,3} [size = nz = 6] 3532 3533 Process1 [P1]: rows_owned=[2] 3534 i = {0,3} [size = nrow+1 = 1+1] 3535 j = {0,1,2} [size = nz = 6] 3536 v = {4,5,6} [size = nz = 6] 3537 3538 .keywords: matrix, aij, compressed row, sparse, parallel 3539 3540 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3541 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3542 @*/ 3543 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3544 { 3545 PetscErrorCode ierr; 3546 3547 PetscFunctionBegin; 3548 if (i[0]) { 3549 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3550 } 3551 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3552 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3553 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3554 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3555 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3556 PetscFunctionReturn(0); 3557 } 3558 3559 #undef __FUNCT__ 3560 #define __FUNCT__ "MatCreateMPIAIJ" 3561 /*@C 3562 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3563 (the default parallel PETSc format). For good matrix assembly performance 3564 the user should preallocate the matrix storage by setting the parameters 3565 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3566 performance can be increased by more than a factor of 50. 3567 3568 Collective on MPI_Comm 3569 3570 Input Parameters: 3571 + comm - MPI communicator 3572 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3573 This value should be the same as the local size used in creating the 3574 y vector for the matrix-vector product y = Ax. 3575 . n - This value should be the same as the local size used in creating the 3576 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3577 calculated if N is given) For square matrices n is almost always m. 3578 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3579 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3580 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3581 (same value is used for all local rows) 3582 . d_nnz - array containing the number of nonzeros in the various rows of the 3583 DIAGONAL portion of the local submatrix (possibly different for each row) 3584 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3585 The size of this array is equal to the number of local rows, i.e 'm'. 3586 You must leave room for the diagonal entry even if it is zero. 3587 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3588 submatrix (same value is used for all local rows). 3589 - o_nnz - array containing the number of nonzeros in the various rows of the 3590 OFF-DIAGONAL portion of the local submatrix (possibly different for 3591 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3592 structure. The size of this array is equal to the number 3593 of local rows, i.e 'm'. 3594 3595 Output Parameter: 3596 . A - the matrix 3597 3598 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3599 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3600 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3601 3602 Notes: 3603 If the *_nnz parameter is given then the *_nz parameter is ignored 3604 3605 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3606 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3607 storage requirements for this matrix. 3608 3609 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3610 processor than it must be used on all processors that share the object for 3611 that argument. 3612 3613 The user MUST specify either the local or global matrix dimensions 3614 (possibly both). 3615 3616 The parallel matrix is partitioned across processors such that the 3617 first m0 rows belong to process 0, the next m1 rows belong to 3618 process 1, the next m2 rows belong to process 2 etc.. where 3619 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3620 values corresponding to [m x N] submatrix. 3621 3622 The columns are logically partitioned with the n0 columns belonging 3623 to 0th partition, the next n1 columns belonging to the next 3624 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3625 3626 The DIAGONAL portion of the local submatrix on any given processor 3627 is the submatrix corresponding to the rows and columns m,n 3628 corresponding to the given processor. i.e diagonal matrix on 3629 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3630 etc. The remaining portion of the local submatrix [m x (N-n)] 3631 constitute the OFF-DIAGONAL portion. The example below better 3632 illustrates this concept. 3633 3634 For a square global matrix we define each processor's diagonal portion 3635 to be its local rows and the corresponding columns (a square submatrix); 3636 each processor's off-diagonal portion encompasses the remainder of the 3637 local matrix (a rectangular submatrix). 3638 3639 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3640 3641 When calling this routine with a single process communicator, a matrix of 3642 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3643 type of communicator, use the construction mechanism: 3644 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3645 3646 By default, this format uses inodes (identical nodes) when possible. 3647 We search for consecutive rows with the same nonzero structure, thereby 3648 reusing matrix information to achieve increased efficiency. 3649 3650 Options Database Keys: 3651 + -mat_no_inode - Do not use inodes 3652 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3653 - -mat_aij_oneindex - Internally use indexing starting at 1 3654 rather than 0. Note that when calling MatSetValues(), 3655 the user still MUST index entries starting at 0! 3656 3657 3658 Example usage: 3659 3660 Consider the following 8x8 matrix with 34 non-zero values, that is 3661 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3662 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3663 as follows: 3664 3665 .vb 3666 1 2 0 | 0 3 0 | 0 4 3667 Proc0 0 5 6 | 7 0 0 | 8 0 3668 9 0 10 | 11 0 0 | 12 0 3669 ------------------------------------- 3670 13 0 14 | 15 16 17 | 0 0 3671 Proc1 0 18 0 | 19 20 21 | 0 0 3672 0 0 0 | 22 23 0 | 24 0 3673 ------------------------------------- 3674 Proc2 25 26 27 | 0 0 28 | 29 0 3675 30 0 0 | 31 32 33 | 0 34 3676 .ve 3677 3678 This can be represented as a collection of submatrices as: 3679 3680 .vb 3681 A B C 3682 D E F 3683 G H I 3684 .ve 3685 3686 Where the submatrices A,B,C are owned by proc0, D,E,F are 3687 owned by proc1, G,H,I are owned by proc2. 3688 3689 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3690 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3691 The 'M','N' parameters are 8,8, and have the same values on all procs. 3692 3693 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3694 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3695 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3696 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3697 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3698 matrix, ans [DF] as another SeqAIJ matrix. 3699 3700 When d_nz, o_nz parameters are specified, d_nz storage elements are 3701 allocated for every row of the local diagonal submatrix, and o_nz 3702 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3703 One way to choose d_nz and o_nz is to use the max nonzerors per local 3704 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3705 In this case, the values of d_nz,o_nz are: 3706 .vb 3707 proc0 : dnz = 2, o_nz = 2 3708 proc1 : dnz = 3, o_nz = 2 3709 proc2 : dnz = 1, o_nz = 4 3710 .ve 3711 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3712 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3713 for proc3. i.e we are using 12+15+10=37 storage locations to store 3714 34 values. 3715 3716 When d_nnz, o_nnz parameters are specified, the storage is specified 3717 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3718 In the above case the values for d_nnz,o_nnz are: 3719 .vb 3720 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3721 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3722 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3723 .ve 3724 Here the space allocated is sum of all the above values i.e 34, and 3725 hence pre-allocation is perfect. 3726 3727 Level: intermediate 3728 3729 .keywords: matrix, aij, compressed row, sparse, parallel 3730 3731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3732 MPIAIJ, MatCreateMPIAIJWithArrays() 3733 @*/ 3734 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3735 { 3736 PetscErrorCode ierr; 3737 PetscMPIInt size; 3738 3739 PetscFunctionBegin; 3740 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3741 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3742 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3743 if (size > 1) { 3744 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3745 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3746 } else { 3747 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3748 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3749 } 3750 PetscFunctionReturn(0); 3751 } 3752 3753 #undef __FUNCT__ 3754 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3755 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3756 { 3757 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3758 3759 PetscFunctionBegin; 3760 *Ad = a->A; 3761 *Ao = a->B; 3762 *colmap = a->garray; 3763 PetscFunctionReturn(0); 3764 } 3765 3766 #undef __FUNCT__ 3767 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3768 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3769 { 3770 PetscErrorCode ierr; 3771 PetscInt i; 3772 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3773 3774 PetscFunctionBegin; 3775 if (coloring->ctype == IS_COLORING_GLOBAL) { 3776 ISColoringValue *allcolors,*colors; 3777 ISColoring ocoloring; 3778 3779 /* set coloring for diagonal portion */ 3780 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3781 3782 /* set coloring for off-diagonal portion */ 3783 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3784 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3785 for (i=0; i<a->B->cmap->n; i++) { 3786 colors[i] = allcolors[a->garray[i]]; 3787 } 3788 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3789 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3790 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3791 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3792 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3793 ISColoringValue *colors; 3794 PetscInt *larray; 3795 ISColoring ocoloring; 3796 3797 /* set coloring for diagonal portion */ 3798 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3799 for (i=0; i<a->A->cmap->n; i++) { 3800 larray[i] = i + A->cmap->rstart; 3801 } 3802 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3803 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3804 for (i=0; i<a->A->cmap->n; i++) { 3805 colors[i] = coloring->colors[larray[i]]; 3806 } 3807 ierr = PetscFree(larray);CHKERRQ(ierr); 3808 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3809 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3810 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3811 3812 /* set coloring for off-diagonal portion */ 3813 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3814 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3815 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3816 for (i=0; i<a->B->cmap->n; i++) { 3817 colors[i] = coloring->colors[larray[i]]; 3818 } 3819 ierr = PetscFree(larray);CHKERRQ(ierr); 3820 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3821 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3822 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3823 } else { 3824 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3825 } 3826 3827 PetscFunctionReturn(0); 3828 } 3829 3830 #if defined(PETSC_HAVE_ADIC) 3831 #undef __FUNCT__ 3832 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3833 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3834 { 3835 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3836 PetscErrorCode ierr; 3837 3838 PetscFunctionBegin; 3839 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3840 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3841 PetscFunctionReturn(0); 3842 } 3843 #endif 3844 3845 #undef __FUNCT__ 3846 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3847 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3848 { 3849 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3850 PetscErrorCode ierr; 3851 3852 PetscFunctionBegin; 3853 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3854 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3855 PetscFunctionReturn(0); 3856 } 3857 3858 #undef __FUNCT__ 3859 #define __FUNCT__ "MatMerge" 3860 /*@ 3861 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3862 matrices from each processor 3863 3864 Collective on MPI_Comm 3865 3866 Input Parameters: 3867 + comm - the communicators the parallel matrix will live on 3868 . inmat - the input sequential matrices 3869 . n - number of local columns (or PETSC_DECIDE) 3870 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3871 3872 Output Parameter: 3873 . outmat - the parallel matrix generated 3874 3875 Level: advanced 3876 3877 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3878 3879 @*/ 3880 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3881 { 3882 PetscErrorCode ierr; 3883 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3884 PetscInt *indx; 3885 PetscScalar *values; 3886 3887 PetscFunctionBegin; 3888 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3889 if (scall == MAT_INITIAL_MATRIX){ 3890 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3891 if (n == PETSC_DECIDE){ 3892 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3893 } 3894 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3895 rstart -= m; 3896 3897 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3898 for (i=0;i<m;i++) { 3899 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3900 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3901 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3902 } 3903 /* This routine will ONLY return MPIAIJ type matrix */ 3904 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3905 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3906 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3907 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3908 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3909 3910 } else if (scall == MAT_REUSE_MATRIX){ 3911 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3912 } else { 3913 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3914 } 3915 3916 for (i=0;i<m;i++) { 3917 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3918 Ii = i + rstart; 3919 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3920 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3921 } 3922 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3923 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3924 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3925 3926 PetscFunctionReturn(0); 3927 } 3928 3929 #undef __FUNCT__ 3930 #define __FUNCT__ "MatFileSplit" 3931 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3932 { 3933 PetscErrorCode ierr; 3934 PetscMPIInt rank; 3935 PetscInt m,N,i,rstart,nnz; 3936 size_t len; 3937 const PetscInt *indx; 3938 PetscViewer out; 3939 char *name; 3940 Mat B; 3941 const PetscScalar *values; 3942 3943 PetscFunctionBegin; 3944 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3945 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3946 /* Should this be the type of the diagonal block of A? */ 3947 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3948 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3949 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3950 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3951 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3952 for (i=0;i<m;i++) { 3953 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3954 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3955 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3956 } 3957 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3958 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3959 3960 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3961 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3962 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3963 sprintf(name,"%s.%d",outfile,rank); 3964 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3965 ierr = PetscFree(name); 3966 ierr = MatView(B,out);CHKERRQ(ierr); 3967 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3968 ierr = MatDestroy(B);CHKERRQ(ierr); 3969 PetscFunctionReturn(0); 3970 } 3971 3972 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3973 #undef __FUNCT__ 3974 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3975 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3976 { 3977 PetscErrorCode ierr; 3978 Mat_Merge_SeqsToMPI *merge; 3979 PetscContainer container; 3980 3981 PetscFunctionBegin; 3982 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3983 if (container) { 3984 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3985 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3986 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3987 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3988 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3989 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3990 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3991 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3997 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 3998 3999 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4000 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4001 } 4002 ierr = PetscFree(merge);CHKERRQ(ierr); 4003 4004 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4005 PetscFunctionReturn(0); 4006 } 4007 4008 #include "../src/mat/utils/freespace.h" 4009 #include "petscbt.h" 4010 4011 #undef __FUNCT__ 4012 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4013 /*@C 4014 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4015 matrices from each processor 4016 4017 Collective on MPI_Comm 4018 4019 Input Parameters: 4020 + comm - the communicators the parallel matrix will live on 4021 . seqmat - the input sequential matrices 4022 . m - number of local rows (or PETSC_DECIDE) 4023 . n - number of local columns (or PETSC_DECIDE) 4024 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4025 4026 Output Parameter: 4027 . mpimat - the parallel matrix generated 4028 4029 Level: advanced 4030 4031 Notes: 4032 The dimensions of the sequential matrix in each processor MUST be the same. 4033 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4034 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4035 @*/ 4036 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4037 { 4038 PetscErrorCode ierr; 4039 MPI_Comm comm=((PetscObject)mpimat)->comm; 4040 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4041 PetscMPIInt size,rank,taga,*len_s; 4042 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4043 PetscInt proc,m; 4044 PetscInt **buf_ri,**buf_rj; 4045 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4046 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4047 MPI_Request *s_waits,*r_waits; 4048 MPI_Status *status; 4049 MatScalar *aa=a->a; 4050 MatScalar **abuf_r,*ba_i; 4051 Mat_Merge_SeqsToMPI *merge; 4052 PetscContainer container; 4053 4054 PetscFunctionBegin; 4055 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4056 4057 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4058 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4059 4060 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4061 if (container) { 4062 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4063 } 4064 bi = merge->bi; 4065 bj = merge->bj; 4066 buf_ri = merge->buf_ri; 4067 buf_rj = merge->buf_rj; 4068 4069 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4070 owners = merge->rowmap->range; 4071 len_s = merge->len_s; 4072 4073 /* send and recv matrix values */ 4074 /*-----------------------------*/ 4075 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4076 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4077 4078 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4079 for (proc=0,k=0; proc<size; proc++){ 4080 if (!len_s[proc]) continue; 4081 i = owners[proc]; 4082 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4083 k++; 4084 } 4085 4086 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4087 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4088 ierr = PetscFree(status);CHKERRQ(ierr); 4089 4090 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4091 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4092 4093 /* insert mat values of mpimat */ 4094 /*----------------------------*/ 4095 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4096 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4097 4098 for (k=0; k<merge->nrecv; k++){ 4099 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4100 nrows = *(buf_ri_k[k]); 4101 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4102 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4103 } 4104 4105 /* set values of ba */ 4106 m = merge->rowmap->n; 4107 for (i=0; i<m; i++) { 4108 arow = owners[rank] + i; 4109 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4110 bnzi = bi[i+1] - bi[i]; 4111 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4112 4113 /* add local non-zero vals of this proc's seqmat into ba */ 4114 anzi = ai[arow+1] - ai[arow]; 4115 aj = a->j + ai[arow]; 4116 aa = a->a + ai[arow]; 4117 nextaj = 0; 4118 for (j=0; nextaj<anzi; j++){ 4119 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4120 ba_i[j] += aa[nextaj++]; 4121 } 4122 } 4123 4124 /* add received vals into ba */ 4125 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4126 /* i-th row */ 4127 if (i == *nextrow[k]) { 4128 anzi = *(nextai[k]+1) - *nextai[k]; 4129 aj = buf_rj[k] + *(nextai[k]); 4130 aa = abuf_r[k] + *(nextai[k]); 4131 nextaj = 0; 4132 for (j=0; nextaj<anzi; j++){ 4133 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4134 ba_i[j] += aa[nextaj++]; 4135 } 4136 } 4137 nextrow[k]++; nextai[k]++; 4138 } 4139 } 4140 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4141 } 4142 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4143 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4144 4145 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4146 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4147 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4148 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4149 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4150 PetscFunctionReturn(0); 4151 } 4152 4153 #undef __FUNCT__ 4154 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4155 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4156 { 4157 PetscErrorCode ierr; 4158 Mat B_mpi; 4159 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4160 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4161 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4162 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4163 PetscInt len,proc,*dnz,*onz; 4164 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4165 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4166 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4167 MPI_Status *status; 4168 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4169 PetscBT lnkbt; 4170 Mat_Merge_SeqsToMPI *merge; 4171 PetscContainer container; 4172 4173 PetscFunctionBegin; 4174 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4175 4176 /* make sure it is a PETSc comm */ 4177 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4179 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4180 4181 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4182 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4183 4184 /* determine row ownership */ 4185 /*---------------------------------------------------------*/ 4186 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4187 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4188 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4189 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4190 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4191 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4192 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4193 4194 m = merge->rowmap->n; 4195 M = merge->rowmap->N; 4196 owners = merge->rowmap->range; 4197 4198 /* determine the number of messages to send, their lengths */ 4199 /*---------------------------------------------------------*/ 4200 len_s = merge->len_s; 4201 4202 len = 0; /* length of buf_si[] */ 4203 merge->nsend = 0; 4204 for (proc=0; proc<size; proc++){ 4205 len_si[proc] = 0; 4206 if (proc == rank){ 4207 len_s[proc] = 0; 4208 } else { 4209 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4210 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4211 } 4212 if (len_s[proc]) { 4213 merge->nsend++; 4214 nrows = 0; 4215 for (i=owners[proc]; i<owners[proc+1]; i++){ 4216 if (ai[i+1] > ai[i]) nrows++; 4217 } 4218 len_si[proc] = 2*(nrows+1); 4219 len += len_si[proc]; 4220 } 4221 } 4222 4223 /* determine the number and length of messages to receive for ij-structure */ 4224 /*-------------------------------------------------------------------------*/ 4225 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4226 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4227 4228 /* post the Irecv of j-structure */ 4229 /*-------------------------------*/ 4230 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4231 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4232 4233 /* post the Isend of j-structure */ 4234 /*--------------------------------*/ 4235 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4236 4237 for (proc=0, k=0; proc<size; proc++){ 4238 if (!len_s[proc]) continue; 4239 i = owners[proc]; 4240 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4241 k++; 4242 } 4243 4244 /* receives and sends of j-structure are complete */ 4245 /*------------------------------------------------*/ 4246 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4247 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4248 4249 /* send and recv i-structure */ 4250 /*---------------------------*/ 4251 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4252 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4253 4254 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4255 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4256 for (proc=0,k=0; proc<size; proc++){ 4257 if (!len_s[proc]) continue; 4258 /* form outgoing message for i-structure: 4259 buf_si[0]: nrows to be sent 4260 [1:nrows]: row index (global) 4261 [nrows+1:2*nrows+1]: i-structure index 4262 */ 4263 /*-------------------------------------------*/ 4264 nrows = len_si[proc]/2 - 1; 4265 buf_si_i = buf_si + nrows+1; 4266 buf_si[0] = nrows; 4267 buf_si_i[0] = 0; 4268 nrows = 0; 4269 for (i=owners[proc]; i<owners[proc+1]; i++){ 4270 anzi = ai[i+1] - ai[i]; 4271 if (anzi) { 4272 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4273 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4274 nrows++; 4275 } 4276 } 4277 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4278 k++; 4279 buf_si += len_si[proc]; 4280 } 4281 4282 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4283 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4284 4285 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4286 for (i=0; i<merge->nrecv; i++){ 4287 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4288 } 4289 4290 ierr = PetscFree(len_si);CHKERRQ(ierr); 4291 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4292 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4293 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4294 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4295 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4296 ierr = PetscFree(status);CHKERRQ(ierr); 4297 4298 /* compute a local seq matrix in each processor */ 4299 /*----------------------------------------------*/ 4300 /* allocate bi array and free space for accumulating nonzero column info */ 4301 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4302 bi[0] = 0; 4303 4304 /* create and initialize a linked list */ 4305 nlnk = N+1; 4306 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4307 4308 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4309 len = 0; 4310 len = ai[owners[rank+1]] - ai[owners[rank]]; 4311 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4312 current_space = free_space; 4313 4314 /* determine symbolic info for each local row */ 4315 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4316 4317 for (k=0; k<merge->nrecv; k++){ 4318 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4319 nrows = *buf_ri_k[k]; 4320 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4321 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4322 } 4323 4324 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4325 len = 0; 4326 for (i=0;i<m;i++) { 4327 bnzi = 0; 4328 /* add local non-zero cols of this proc's seqmat into lnk */ 4329 arow = owners[rank] + i; 4330 anzi = ai[arow+1] - ai[arow]; 4331 aj = a->j + ai[arow]; 4332 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4333 bnzi += nlnk; 4334 /* add received col data into lnk */ 4335 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4336 if (i == *nextrow[k]) { /* i-th row */ 4337 anzi = *(nextai[k]+1) - *nextai[k]; 4338 aj = buf_rj[k] + *nextai[k]; 4339 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4340 bnzi += nlnk; 4341 nextrow[k]++; nextai[k]++; 4342 } 4343 } 4344 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4345 4346 /* if free space is not available, make more free space */ 4347 if (current_space->local_remaining<bnzi) { 4348 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4349 nspacedouble++; 4350 } 4351 /* copy data into free space, then initialize lnk */ 4352 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4353 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4354 4355 current_space->array += bnzi; 4356 current_space->local_used += bnzi; 4357 current_space->local_remaining -= bnzi; 4358 4359 bi[i+1] = bi[i] + bnzi; 4360 } 4361 4362 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4363 4364 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4365 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4366 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4367 4368 /* create symbolic parallel matrix B_mpi */ 4369 /*---------------------------------------*/ 4370 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4371 if (n==PETSC_DECIDE) { 4372 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4373 } else { 4374 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4375 } 4376 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4377 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4378 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4379 4380 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4381 B_mpi->assembled = PETSC_FALSE; 4382 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4383 merge->bi = bi; 4384 merge->bj = bj; 4385 merge->buf_ri = buf_ri; 4386 merge->buf_rj = buf_rj; 4387 merge->coi = PETSC_NULL; 4388 merge->coj = PETSC_NULL; 4389 merge->owners_co = PETSC_NULL; 4390 4391 /* attach the supporting struct to B_mpi for reuse */ 4392 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4393 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4394 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4395 *mpimat = B_mpi; 4396 4397 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4398 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4399 PetscFunctionReturn(0); 4400 } 4401 4402 #undef __FUNCT__ 4403 #define __FUNCT__ "MatMerge_SeqsToMPI" 4404 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4405 { 4406 PetscErrorCode ierr; 4407 4408 PetscFunctionBegin; 4409 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4410 if (scall == MAT_INITIAL_MATRIX){ 4411 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4412 } 4413 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4414 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4415 PetscFunctionReturn(0); 4416 } 4417 4418 #undef __FUNCT__ 4419 #define __FUNCT__ "MatGetLocalMat" 4420 /*@ 4421 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4422 4423 Not Collective 4424 4425 Input Parameters: 4426 + A - the matrix 4427 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4428 4429 Output Parameter: 4430 . A_loc - the local sequential matrix generated 4431 4432 Level: developer 4433 4434 @*/ 4435 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4436 { 4437 PetscErrorCode ierr; 4438 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4439 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4440 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4441 MatScalar *aa=a->a,*ba=b->a,*cam; 4442 PetscScalar *ca; 4443 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4444 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4445 4446 PetscFunctionBegin; 4447 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4448 if (scall == MAT_INITIAL_MATRIX){ 4449 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4450 ci[0] = 0; 4451 for (i=0; i<am; i++){ 4452 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4453 } 4454 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4455 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4456 k = 0; 4457 for (i=0; i<am; i++) { 4458 ncols_o = bi[i+1] - bi[i]; 4459 ncols_d = ai[i+1] - ai[i]; 4460 /* off-diagonal portion of A */ 4461 for (jo=0; jo<ncols_o; jo++) { 4462 col = cmap[*bj]; 4463 if (col >= cstart) break; 4464 cj[k] = col; bj++; 4465 ca[k++] = *ba++; 4466 } 4467 /* diagonal portion of A */ 4468 for (j=0; j<ncols_d; j++) { 4469 cj[k] = cstart + *aj++; 4470 ca[k++] = *aa++; 4471 } 4472 /* off-diagonal portion of A */ 4473 for (j=jo; j<ncols_o; j++) { 4474 cj[k] = cmap[*bj++]; 4475 ca[k++] = *ba++; 4476 } 4477 } 4478 /* put together the new matrix */ 4479 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4480 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4481 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4482 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4483 mat->free_a = PETSC_TRUE; 4484 mat->free_ij = PETSC_TRUE; 4485 mat->nonew = 0; 4486 } else if (scall == MAT_REUSE_MATRIX){ 4487 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4488 ci = mat->i; cj = mat->j; cam = mat->a; 4489 for (i=0; i<am; i++) { 4490 /* off-diagonal portion of A */ 4491 ncols_o = bi[i+1] - bi[i]; 4492 for (jo=0; jo<ncols_o; jo++) { 4493 col = cmap[*bj]; 4494 if (col >= cstart) break; 4495 *cam++ = *ba++; bj++; 4496 } 4497 /* diagonal portion of A */ 4498 ncols_d = ai[i+1] - ai[i]; 4499 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4500 /* off-diagonal portion of A */ 4501 for (j=jo; j<ncols_o; j++) { 4502 *cam++ = *ba++; bj++; 4503 } 4504 } 4505 } else { 4506 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4507 } 4508 4509 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 #undef __FUNCT__ 4514 #define __FUNCT__ "MatGetLocalMatCondensed" 4515 /*@C 4516 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4517 4518 Not Collective 4519 4520 Input Parameters: 4521 + A - the matrix 4522 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4523 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4524 4525 Output Parameter: 4526 . A_loc - the local sequential matrix generated 4527 4528 Level: developer 4529 4530 @*/ 4531 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4532 { 4533 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4534 PetscErrorCode ierr; 4535 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4536 IS isrowa,iscola; 4537 Mat *aloc; 4538 4539 PetscFunctionBegin; 4540 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4541 if (!row){ 4542 start = A->rmap->rstart; end = A->rmap->rend; 4543 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4544 } else { 4545 isrowa = *row; 4546 } 4547 if (!col){ 4548 start = A->cmap->rstart; 4549 cmap = a->garray; 4550 nzA = a->A->cmap->n; 4551 nzB = a->B->cmap->n; 4552 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4553 ncols = 0; 4554 for (i=0; i<nzB; i++) { 4555 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4556 else break; 4557 } 4558 imark = i; 4559 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4560 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4561 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4562 ierr = PetscFree(idx);CHKERRQ(ierr); 4563 } else { 4564 iscola = *col; 4565 } 4566 if (scall != MAT_INITIAL_MATRIX){ 4567 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4568 aloc[0] = *A_loc; 4569 } 4570 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4571 *A_loc = aloc[0]; 4572 ierr = PetscFree(aloc);CHKERRQ(ierr); 4573 if (!row){ 4574 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4575 } 4576 if (!col){ 4577 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4578 } 4579 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4580 PetscFunctionReturn(0); 4581 } 4582 4583 #undef __FUNCT__ 4584 #define __FUNCT__ "MatGetBrowsOfAcols" 4585 /*@C 4586 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4587 4588 Collective on Mat 4589 4590 Input Parameters: 4591 + A,B - the matrices in mpiaij format 4592 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4593 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4594 4595 Output Parameter: 4596 + rowb, colb - index sets of rows and columns of B to extract 4597 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4598 - B_seq - the sequential matrix generated 4599 4600 Level: developer 4601 4602 @*/ 4603 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4604 { 4605 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4606 PetscErrorCode ierr; 4607 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4608 IS isrowb,iscolb; 4609 Mat *bseq; 4610 4611 PetscFunctionBegin; 4612 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4613 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4614 } 4615 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4616 4617 if (scall == MAT_INITIAL_MATRIX){ 4618 start = A->cmap->rstart; 4619 cmap = a->garray; 4620 nzA = a->A->cmap->n; 4621 nzB = a->B->cmap->n; 4622 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4623 ncols = 0; 4624 for (i=0; i<nzB; i++) { /* row < local row index */ 4625 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4626 else break; 4627 } 4628 imark = i; 4629 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4630 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4631 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4632 ierr = PetscFree(idx);CHKERRQ(ierr); 4633 *brstart = imark; 4634 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4635 } else { 4636 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4637 isrowb = *rowb; iscolb = *colb; 4638 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4639 bseq[0] = *B_seq; 4640 } 4641 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4642 *B_seq = bseq[0]; 4643 ierr = PetscFree(bseq);CHKERRQ(ierr); 4644 if (!rowb){ 4645 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4646 } else { 4647 *rowb = isrowb; 4648 } 4649 if (!colb){ 4650 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4651 } else { 4652 *colb = iscolb; 4653 } 4654 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4655 PetscFunctionReturn(0); 4656 } 4657 4658 #undef __FUNCT__ 4659 #define __FUNCT__ "MatGetBrowsOfAoCols" 4660 /*@C 4661 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4662 of the OFF-DIAGONAL portion of local A 4663 4664 Collective on Mat 4665 4666 Input Parameters: 4667 + A,B - the matrices in mpiaij format 4668 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4669 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4670 . startsj_r - similar to startsj for receives 4671 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4672 4673 Output Parameter: 4674 + B_oth - the sequential matrix generated 4675 4676 Level: developer 4677 4678 @*/ 4679 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4680 { 4681 VecScatter_MPI_General *gen_to,*gen_from; 4682 PetscErrorCode ierr; 4683 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4684 Mat_SeqAIJ *b_oth; 4685 VecScatter ctx=a->Mvctx; 4686 MPI_Comm comm=((PetscObject)ctx)->comm; 4687 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4688 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4689 PetscScalar *rvalues,*svalues; 4690 MatScalar *b_otha,*bufa,*bufA; 4691 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4692 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4693 MPI_Status *sstatus,rstatus; 4694 PetscMPIInt jj; 4695 PetscInt *cols,sbs,rbs; 4696 PetscScalar *vals; 4697 4698 PetscFunctionBegin; 4699 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4700 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4701 } 4702 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4703 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4704 4705 gen_to = (VecScatter_MPI_General*)ctx->todata; 4706 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4707 rvalues = gen_from->values; /* holds the length of receiving row */ 4708 svalues = gen_to->values; /* holds the length of sending row */ 4709 nrecvs = gen_from->n; 4710 nsends = gen_to->n; 4711 4712 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4713 srow = gen_to->indices; /* local row index to be sent */ 4714 sstarts = gen_to->starts; 4715 sprocs = gen_to->procs; 4716 sstatus = gen_to->sstatus; 4717 sbs = gen_to->bs; 4718 rstarts = gen_from->starts; 4719 rprocs = gen_from->procs; 4720 rbs = gen_from->bs; 4721 4722 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4723 if (scall == MAT_INITIAL_MATRIX){ 4724 /* i-array */ 4725 /*---------*/ 4726 /* post receives */ 4727 for (i=0; i<nrecvs; i++){ 4728 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4729 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4730 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4731 } 4732 4733 /* pack the outgoing message */ 4734 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4735 sstartsj[0] = 0; rstartsj[0] = 0; 4736 len = 0; /* total length of j or a array to be sent */ 4737 k = 0; 4738 for (i=0; i<nsends; i++){ 4739 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4740 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4741 for (j=0; j<nrows; j++) { 4742 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4743 for (l=0; l<sbs; l++){ 4744 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4745 rowlen[j*sbs+l] = ncols; 4746 len += ncols; 4747 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4748 } 4749 k++; 4750 } 4751 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4752 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4753 } 4754 /* recvs and sends of i-array are completed */ 4755 i = nrecvs; 4756 while (i--) { 4757 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4758 } 4759 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4760 4761 /* allocate buffers for sending j and a arrays */ 4762 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4763 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4764 4765 /* create i-array of B_oth */ 4766 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4767 b_othi[0] = 0; 4768 len = 0; /* total length of j or a array to be received */ 4769 k = 0; 4770 for (i=0; i<nrecvs; i++){ 4771 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4772 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4773 for (j=0; j<nrows; j++) { 4774 b_othi[k+1] = b_othi[k] + rowlen[j]; 4775 len += rowlen[j]; k++; 4776 } 4777 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4778 } 4779 4780 /* allocate space for j and a arrrays of B_oth */ 4781 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4782 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4783 4784 /* j-array */ 4785 /*---------*/ 4786 /* post receives of j-array */ 4787 for (i=0; i<nrecvs; i++){ 4788 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4789 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4790 } 4791 4792 /* pack the outgoing message j-array */ 4793 k = 0; 4794 for (i=0; i<nsends; i++){ 4795 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4796 bufJ = bufj+sstartsj[i]; 4797 for (j=0; j<nrows; j++) { 4798 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4799 for (ll=0; ll<sbs; ll++){ 4800 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4801 for (l=0; l<ncols; l++){ 4802 *bufJ++ = cols[l]; 4803 } 4804 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4805 } 4806 } 4807 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4808 } 4809 4810 /* recvs and sends of j-array are completed */ 4811 i = nrecvs; 4812 while (i--) { 4813 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4814 } 4815 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4816 } else if (scall == MAT_REUSE_MATRIX){ 4817 sstartsj = *startsj; 4818 rstartsj = *startsj_r; 4819 bufa = *bufa_ptr; 4820 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4821 b_otha = b_oth->a; 4822 } else { 4823 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4824 } 4825 4826 /* a-array */ 4827 /*---------*/ 4828 /* post receives of a-array */ 4829 for (i=0; i<nrecvs; i++){ 4830 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4831 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4832 } 4833 4834 /* pack the outgoing message a-array */ 4835 k = 0; 4836 for (i=0; i<nsends; i++){ 4837 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4838 bufA = bufa+sstartsj[i]; 4839 for (j=0; j<nrows; j++) { 4840 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4841 for (ll=0; ll<sbs; ll++){ 4842 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4843 for (l=0; l<ncols; l++){ 4844 *bufA++ = vals[l]; 4845 } 4846 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4847 } 4848 } 4849 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4850 } 4851 /* recvs and sends of a-array are completed */ 4852 i = nrecvs; 4853 while (i--) { 4854 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4855 } 4856 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4857 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4858 4859 if (scall == MAT_INITIAL_MATRIX){ 4860 /* put together the new matrix */ 4861 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4862 4863 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4864 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4865 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4866 b_oth->free_a = PETSC_TRUE; 4867 b_oth->free_ij = PETSC_TRUE; 4868 b_oth->nonew = 0; 4869 4870 ierr = PetscFree(bufj);CHKERRQ(ierr); 4871 if (!startsj || !bufa_ptr){ 4872 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4873 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4874 } else { 4875 *startsj = sstartsj; 4876 *startsj_r = rstartsj; 4877 *bufa_ptr = bufa; 4878 } 4879 } 4880 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4881 PetscFunctionReturn(0); 4882 } 4883 4884 #undef __FUNCT__ 4885 #define __FUNCT__ "MatGetCommunicationStructs" 4886 /*@C 4887 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4888 4889 Not Collective 4890 4891 Input Parameters: 4892 . A - The matrix in mpiaij format 4893 4894 Output Parameter: 4895 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4896 . colmap - A map from global column index to local index into lvec 4897 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4898 4899 Level: developer 4900 4901 @*/ 4902 #if defined (PETSC_USE_CTABLE) 4903 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4904 #else 4905 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4906 #endif 4907 { 4908 Mat_MPIAIJ *a; 4909 4910 PetscFunctionBegin; 4911 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4912 PetscValidPointer(lvec, 2) 4913 PetscValidPointer(colmap, 3) 4914 PetscValidPointer(multScatter, 4) 4915 a = (Mat_MPIAIJ *) A->data; 4916 if (lvec) *lvec = a->lvec; 4917 if (colmap) *colmap = a->colmap; 4918 if (multScatter) *multScatter = a->Mvctx; 4919 PetscFunctionReturn(0); 4920 } 4921 4922 EXTERN_C_BEGIN 4923 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4924 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4925 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4926 EXTERN_C_END 4927 4928 #undef __FUNCT__ 4929 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4930 /* 4931 Computes (B'*A')' since computing B*A directly is untenable 4932 4933 n p p 4934 ( ) ( ) ( ) 4935 m ( A ) * n ( B ) = m ( C ) 4936 ( ) ( ) ( ) 4937 4938 */ 4939 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4940 { 4941 PetscErrorCode ierr; 4942 Mat At,Bt,Ct; 4943 4944 PetscFunctionBegin; 4945 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4946 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4947 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4948 ierr = MatDestroy(At);CHKERRQ(ierr); 4949 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4950 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4951 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4952 PetscFunctionReturn(0); 4953 } 4954 4955 #undef __FUNCT__ 4956 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4957 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4958 { 4959 PetscErrorCode ierr; 4960 PetscInt m=A->rmap->n,n=B->cmap->n; 4961 Mat Cmat; 4962 4963 PetscFunctionBegin; 4964 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4965 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4966 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4967 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4968 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4969 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4970 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4971 *C = Cmat; 4972 PetscFunctionReturn(0); 4973 } 4974 4975 /* ----------------------------------------------------------------*/ 4976 #undef __FUNCT__ 4977 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4978 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4979 { 4980 PetscErrorCode ierr; 4981 4982 PetscFunctionBegin; 4983 if (scall == MAT_INITIAL_MATRIX){ 4984 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4985 } 4986 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4987 PetscFunctionReturn(0); 4988 } 4989 4990 EXTERN_C_BEGIN 4991 #if defined(PETSC_HAVE_MUMPS) 4992 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4993 #endif 4994 #if defined(PETSC_HAVE_PASTIX) 4995 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4996 #endif 4997 #if defined(PETSC_HAVE_SUPERLU_DIST) 4998 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4999 #endif 5000 #if defined(PETSC_HAVE_SPOOLES) 5001 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5002 #endif 5003 EXTERN_C_END 5004 5005 /*MC 5006 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5007 5008 Options Database Keys: 5009 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5010 5011 Level: beginner 5012 5013 .seealso: MatCreateMPIAIJ() 5014 M*/ 5015 5016 EXTERN_C_BEGIN 5017 #undef __FUNCT__ 5018 #define __FUNCT__ "MatCreate_MPIAIJ" 5019 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5020 { 5021 Mat_MPIAIJ *b; 5022 PetscErrorCode ierr; 5023 PetscMPIInt size; 5024 5025 PetscFunctionBegin; 5026 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5027 5028 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5029 B->data = (void*)b; 5030 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5031 B->rmap->bs = 1; 5032 B->assembled = PETSC_FALSE; 5033 B->mapping = 0; 5034 5035 B->insertmode = NOT_SET_VALUES; 5036 b->size = size; 5037 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5038 5039 /* build cache for off array entries formed */ 5040 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5041 b->donotstash = PETSC_FALSE; 5042 b->colmap = 0; 5043 b->garray = 0; 5044 b->roworiented = PETSC_TRUE; 5045 5046 /* stuff used for matrix vector multiply */ 5047 b->lvec = PETSC_NULL; 5048 b->Mvctx = PETSC_NULL; 5049 5050 /* stuff for MatGetRow() */ 5051 b->rowindices = 0; 5052 b->rowvalues = 0; 5053 b->getrowactive = PETSC_FALSE; 5054 5055 #if defined(PETSC_HAVE_SPOOLES) 5056 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5057 "MatGetFactor_mpiaij_spooles", 5058 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5059 #endif 5060 #if defined(PETSC_HAVE_MUMPS) 5061 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5062 "MatGetFactor_mpiaij_mumps", 5063 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5064 #endif 5065 #if defined(PETSC_HAVE_PASTIX) 5066 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5067 "MatGetFactor_mpiaij_pastix", 5068 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5069 #endif 5070 #if defined(PETSC_HAVE_SUPERLU_DIST) 5071 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5072 "MatGetFactor_mpiaij_superlu_dist", 5073 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5074 #endif 5075 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5076 "MatStoreValues_MPIAIJ", 5077 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5079 "MatRetrieveValues_MPIAIJ", 5080 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5081 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5082 "MatGetDiagonalBlock_MPIAIJ", 5083 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5085 "MatIsTranspose_MPIAIJ", 5086 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5088 "MatMPIAIJSetPreallocation_MPIAIJ", 5089 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5091 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5092 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5094 "MatDiagonalScaleLocal_MPIAIJ", 5095 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5096 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5097 "MatConvert_MPIAIJ_MPICSRPERM", 5098 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5100 "MatConvert_MPIAIJ_MPICRL", 5101 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5103 "MatConvert_MPIAIJ_MPISBAIJ", 5104 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5106 "MatMatMult_MPIDense_MPIAIJ", 5107 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5108 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5109 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5110 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5112 "MatMatMultNumeric_MPIDense_MPIAIJ", 5113 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5114 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5115 PetscFunctionReturn(0); 5116 } 5117 EXTERN_C_END 5118 5119 #undef __FUNCT__ 5120 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5121 /*@ 5122 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5123 and "off-diagonal" part of the matrix in CSR format. 5124 5125 Collective on MPI_Comm 5126 5127 Input Parameters: 5128 + comm - MPI communicator 5129 . m - number of local rows (Cannot be PETSC_DECIDE) 5130 . n - This value should be the same as the local size used in creating the 5131 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5132 calculated if N is given) For square matrices n is almost always m. 5133 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5134 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5135 . i - row indices for "diagonal" portion of matrix 5136 . j - column indices 5137 . a - matrix values 5138 . oi - row indices for "off-diagonal" portion of matrix 5139 . oj - column indices 5140 - oa - matrix values 5141 5142 Output Parameter: 5143 . mat - the matrix 5144 5145 Level: advanced 5146 5147 Notes: 5148 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5149 5150 The i and j indices are 0 based 5151 5152 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5153 5154 This sets local rows and cannot be used to set off-processor values. 5155 5156 You cannot later use MatSetValues() to change values in this matrix. 5157 5158 .keywords: matrix, aij, compressed row, sparse, parallel 5159 5160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5161 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5162 @*/ 5163 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5164 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5165 { 5166 PetscErrorCode ierr; 5167 Mat_MPIAIJ *maij; 5168 5169 PetscFunctionBegin; 5170 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5171 if (i[0]) { 5172 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5173 } 5174 if (oi[0]) { 5175 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5176 } 5177 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5178 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5179 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5180 maij = (Mat_MPIAIJ*) (*mat)->data; 5181 maij->donotstash = PETSC_TRUE; 5182 (*mat)->preallocated = PETSC_TRUE; 5183 5184 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5185 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5186 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5187 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5188 5189 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5190 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5191 5192 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5193 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5194 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5195 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5196 5197 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5198 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5199 PetscFunctionReturn(0); 5200 } 5201 5202 /* 5203 Special version for direct calls from Fortran 5204 */ 5205 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5206 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5207 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5208 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5209 #endif 5210 5211 /* Change these macros so can be used in void function */ 5212 #undef CHKERRQ 5213 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5214 #undef SETERRQ2 5215 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5216 #undef SETERRQ 5217 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5218 5219 EXTERN_C_BEGIN 5220 #undef __FUNCT__ 5221 #define __FUNCT__ "matsetvaluesmpiaij_" 5222 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5223 { 5224 Mat mat = *mmat; 5225 PetscInt m = *mm, n = *mn; 5226 InsertMode addv = *maddv; 5227 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5228 PetscScalar value; 5229 PetscErrorCode ierr; 5230 5231 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5232 if (mat->insertmode == NOT_SET_VALUES) { 5233 mat->insertmode = addv; 5234 } 5235 #if defined(PETSC_USE_DEBUG) 5236 else if (mat->insertmode != addv) { 5237 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5238 } 5239 #endif 5240 { 5241 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5242 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5243 PetscTruth roworiented = aij->roworiented; 5244 5245 /* Some Variables required in the macro */ 5246 Mat A = aij->A; 5247 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5248 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5249 MatScalar *aa = a->a; 5250 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5251 Mat B = aij->B; 5252 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5253 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5254 MatScalar *ba = b->a; 5255 5256 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5257 PetscInt nonew = a->nonew; 5258 MatScalar *ap1,*ap2; 5259 5260 PetscFunctionBegin; 5261 for (i=0; i<m; i++) { 5262 if (im[i] < 0) continue; 5263 #if defined(PETSC_USE_DEBUG) 5264 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5265 #endif 5266 if (im[i] >= rstart && im[i] < rend) { 5267 row = im[i] - rstart; 5268 lastcol1 = -1; 5269 rp1 = aj + ai[row]; 5270 ap1 = aa + ai[row]; 5271 rmax1 = aimax[row]; 5272 nrow1 = ailen[row]; 5273 low1 = 0; 5274 high1 = nrow1; 5275 lastcol2 = -1; 5276 rp2 = bj + bi[row]; 5277 ap2 = ba + bi[row]; 5278 rmax2 = bimax[row]; 5279 nrow2 = bilen[row]; 5280 low2 = 0; 5281 high2 = nrow2; 5282 5283 for (j=0; j<n; j++) { 5284 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5285 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5286 if (in[j] >= cstart && in[j] < cend){ 5287 col = in[j] - cstart; 5288 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5289 } else if (in[j] < 0) continue; 5290 #if defined(PETSC_USE_DEBUG) 5291 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5292 #endif 5293 else { 5294 if (mat->was_assembled) { 5295 if (!aij->colmap) { 5296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5297 } 5298 #if defined (PETSC_USE_CTABLE) 5299 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5300 col--; 5301 #else 5302 col = aij->colmap[in[j]] - 1; 5303 #endif 5304 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5305 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5306 col = in[j]; 5307 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5308 B = aij->B; 5309 b = (Mat_SeqAIJ*)B->data; 5310 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5311 rp2 = bj + bi[row]; 5312 ap2 = ba + bi[row]; 5313 rmax2 = bimax[row]; 5314 nrow2 = bilen[row]; 5315 low2 = 0; 5316 high2 = nrow2; 5317 bm = aij->B->rmap->n; 5318 ba = b->a; 5319 } 5320 } else col = in[j]; 5321 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5322 } 5323 } 5324 } else { 5325 if (!aij->donotstash) { 5326 if (roworiented) { 5327 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5328 } else { 5329 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5330 } 5331 } 5332 } 5333 }} 5334 PetscFunctionReturnVoid(); 5335 } 5336 EXTERN_C_END 5337 5338